From fc96e5c0536ae6d9d689a373b696f4fd3659f7d3 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Wed, 5 May 2021 12:53:53 +0200 Subject: basic/escape: allow truncation mode where "…" is always appended MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we would append "…" or "..." when the string was wider than the specified output width. But let's add a mode where the caller knows that the string being passed is already truncated. The condition for jumping back in utf8_escape_non_printable_full() was off-by-one. But we only jumped to that label after doing a check with a stronger condition, so I think it didn't matter. Now it matters because we'd output the forced ellipsis one column too early. --- src/basic/escape.c | 16 ++++++++++++---- src/basic/escape.h | 1 + src/basic/utf8.c | 12 ++++++++---- src/basic/utf8.h | 4 ++-- src/test/test-escape.c | 13 +++++++++++-- src/test/test-utf8.c | 49 +++++++++++++++++++++++-------------------------- 6 files changed, 57 insertions(+), 38 deletions(-) diff --git a/src/basic/escape.c b/src/basic/escape.c index f579f15d87..2a3a0e31a1 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -368,7 +368,8 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape * reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through * unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings. * - * If console_width is reached, output is truncated and "..." is appended. */ + * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is + * appended. */ if (console_width == 0) return strdup(""); @@ -380,10 +381,15 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape memset(ans, '_', MIN(strlen(s), console_width) * 4); ans[MIN(strlen(s), console_width) * 4] = 0; + bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS); + for (f = s, t = prev = prev2 = ans; ; f++) { char *tmp_t = t; if (!*f) { + if (force_ellipsis) + break; + *t = 0; return ans; } @@ -391,7 +397,7 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape if ((unsigned char) *f < ' ' || (!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) || *f == '\\' || strchr(bad, *f)) { - if ((size_t) (t - ans) + 4 > console_width) + if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width) break; *(t++) = '\\'; @@ -399,7 +405,7 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape *(t++) = hexchar(*f >> 4); *(t++) = hexchar(*f); } else { - if ((size_t) (t - ans) + 1 > console_width) + if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width) break; *(t++) = *f; @@ -432,7 +438,9 @@ char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFl if (FLAGS_SET(flags, XESCAPE_8_BIT)) return xescape_full(str, "", console_width, flags); else - return utf8_escape_non_printable_full(str, console_width); + return utf8_escape_non_printable_full(str, + console_width, + FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS)); } char* octescape(const char *s, size_t len) { diff --git a/src/basic/escape.h b/src/basic/escape.h index 945e7dc82c..907b572bd4 100644 --- a/src/basic/escape.h +++ b/src/basic/escape.h @@ -56,6 +56,7 @@ int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, typedef enum XEscapeFlags { XESCAPE_8_BIT = 1 << 0, + XESCAPE_FORCE_ELLIPSIS = 1 << 1, } XEscapeFlags; char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags); diff --git a/src/basic/utf8.c b/src/basic/utf8.c index 244b8ade93..63fc9f71d1 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -212,7 +212,7 @@ static int utf8_char_console_width(const char *str) { return unichar_iswide(c) ? 2 : 1; } -char *utf8_escape_non_printable_full(const char *str, size_t console_width) { +char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis) { char *p, *s, *prev_s; size_t n = 0; /* estimated print width */ @@ -229,8 +229,12 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width) { int len; char *saved_s = s; - if (!*str) /* done! */ - goto finish; + if (!*str) { /* done! */ + if (force_ellipsis) + goto truncation; + else + goto finish; + } len = utf8_encoded_valid_unichar(str, SIZE_MAX); if (len > 0) { @@ -274,7 +278,7 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width) { truncation: /* Try to go back one if we don't have enough space for the ellipsis */ - if (n + 1 >= console_width) + if (n + 1 > console_width) s = prev_s; s = mempcpy(s, "…", strlen("…")); diff --git a/src/basic/utf8.h b/src/basic/utf8.h index 219ca89184..b0e969f655 100644 --- a/src/basic/utf8.h +++ b/src/basic/utf8.h @@ -25,9 +25,9 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin #define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true) char *utf8_escape_invalid(const char *s); -char *utf8_escape_non_printable_full(const char *str, size_t console_width); +char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis); static inline char *utf8_escape_non_printable(const char *str) { - return utf8_escape_non_printable_full(str, SIZE_MAX); + return utf8_escape_non_printable_full(str, SIZE_MAX, false); } size_t utf8_encode_unichar(char *out_utf8, char32_t g); diff --git a/src/test/test-escape.c b/src/test/test-escape.c index 63f9306fb4..991b135a33 100644 --- a/src/test/test-escape.c +++ b/src/test/test-escape.c @@ -27,11 +27,11 @@ static void test_xescape_full(bool eight_bits) { XEscapeFlags flags = eight_bits * XESCAPE_8_BIT; for (unsigned i = 0; i < 60; i++) { - _cleanup_free_ char *t; + _cleanup_free_ char *t, *q; assert_se(t = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, flags)); - log_info("%02d: %s", i, t); + log_info("%02d: <%s>", i, t); if (i >= full_fit) assert_se(streq(t, escaped)); @@ -45,6 +45,15 @@ static void test_xescape_full(bool eight_bits) { assert_se(strlen(t) == i); assert_se(strneq(t, "...", i)); } + + assert_se(q = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, + flags | XESCAPE_FORCE_ELLIPSIS)); + + log_info("%02d: <%s>", i, q); + if (i > 0) + assert_se(endswith(q, ".")); + assert(strlen(q) <= i); + assert(strlen(q) + 3 >= strlen(t)); } } diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index 042b94634b..cdbdfcb054 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -136,32 +136,29 @@ static void test_utf8_escape_non_printable(void) { static void test_utf8_escape_non_printable_full(void) { log_info("/* %s */", __func__); - for (size_t i = 0; i < 20; i++) { - _cleanup_free_ char *p; - - p = utf8_escape_non_printable_full("goo goo goo", i); - puts(p); - assert_se(utf8_is_valid(p)); - assert_se(utf8_console_width(p) <= i); - } - - for (size_t i = 0; i < 20; i++) { - _cleanup_free_ char *p; - - p = utf8_escape_non_printable_full("\001 \019\20\a", i); - puts(p); - assert_se(utf8_is_valid(p)); - assert_se(utf8_console_width(p) <= i); - } - - for (size_t i = 0; i < 20; i++) { - _cleanup_free_ char *p; - - p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i); - puts(p); - assert_se(utf8_is_valid(p)); - assert_se(utf8_console_width(p) <= i); - } + const char *s; + FOREACH_STRING(s, + "goo goo goo", /* ASCII */ + "\001 \019\20\a", /* control characters */ + "\xef\xbf\x30\x13") /* misplaced continuation bytes followed by a digit and cc */ + for (size_t cw = 0; cw < 22; cw++) { + _cleanup_free_ char *p, *q; + size_t ew; + + p = utf8_escape_non_printable_full(s, cw, false); + ew = utf8_console_width(p); + log_debug("%02zu \"%s\" (%zu wasted)", cw, p, cw - ew); + assert_se(utf8_is_valid(p)); + assert_se(ew <= cw); + + q = utf8_escape_non_printable_full(s, cw, true); + ew = utf8_console_width(q); + log_debug(" \"%s\" (%zu wasted)", q, cw - ew); + assert_se(utf8_is_valid(q)); + assert_se(ew <= cw); + if (cw > 0) + assert_se(endswith(q, "…")); + } } static void test_utf16_to_utf8(void) { -- cgit v1.2.3