summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2021-05-05 12:53:53 +0200
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2021-05-05 13:59:23 +0200
commitfc96e5c0536ae6d9d689a373b696f4fd3659f7d3 (patch)
treedcd2abb7deafeeaff8efd704f4574737e47a2455
parentbasic/escape: flagsify xescape_full() (diff)
downloadsystemd-fc96e5c0536ae6d9d689a373b696f4fd3659f7d3.tar.xz
systemd-fc96e5c0536ae6d9d689a373b696f4fd3659f7d3.zip
basic/escape: allow truncation mode where "…" is always appended
So far we would append "…" or "..." when the string was wider than the specified output width. But let's add a mode where the caller knows that the string being passed is already truncated. The condition for jumping back in utf8_escape_non_printable_full() was off-by-one. But we only jumped to that label after doing a check with a stronger condition, so I think it didn't matter. Now it matters because we'd output the forced ellipsis one column too early.
-rw-r--r--src/basic/escape.c16
-rw-r--r--src/basic/escape.h1
-rw-r--r--src/basic/utf8.c12
-rw-r--r--src/basic/utf8.h4
-rw-r--r--src/test/test-escape.c13
-rw-r--r--src/test/test-utf8.c49
6 files changed, 57 insertions, 38 deletions
diff --git a/src/basic/escape.c b/src/basic/escape.c
index f579f15d87..2a3a0e31a1 100644
--- a/src/basic/escape.c
+++ b/src/basic/escape.c
@@ -368,7 +368,8 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
* reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through
* unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings.
*
- * If console_width is reached, output is truncated and "..." is appended. */
+ * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is
+ * appended. */
if (console_width == 0)
return strdup("");
@@ -380,10 +381,15 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
memset(ans, '_', MIN(strlen(s), console_width) * 4);
ans[MIN(strlen(s), console_width) * 4] = 0;
+ bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS);
+
for (f = s, t = prev = prev2 = ans; ; f++) {
char *tmp_t = t;
if (!*f) {
+ if (force_ellipsis)
+ break;
+
*t = 0;
return ans;
}
@@ -391,7 +397,7 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
if ((unsigned char) *f < ' ' ||
(!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) ||
*f == '\\' || strchr(bad, *f)) {
- if ((size_t) (t - ans) + 4 > console_width)
+ if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width)
break;
*(t++) = '\\';
@@ -399,7 +405,7 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
*(t++) = hexchar(*f >> 4);
*(t++) = hexchar(*f);
} else {
- if ((size_t) (t - ans) + 1 > console_width)
+ if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width)
break;
*(t++) = *f;
@@ -432,7 +438,9 @@ char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFl
if (FLAGS_SET(flags, XESCAPE_8_BIT))
return xescape_full(str, "", console_width, flags);
else
- return utf8_escape_non_printable_full(str, console_width);
+ return utf8_escape_non_printable_full(str,
+ console_width,
+ FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS));
}
char* octescape(const char *s, size_t len) {
diff --git a/src/basic/escape.h b/src/basic/escape.h
index 945e7dc82c..907b572bd4 100644
--- a/src/basic/escape.h
+++ b/src/basic/escape.h
@@ -56,6 +56,7 @@ int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit,
typedef enum XEscapeFlags {
XESCAPE_8_BIT = 1 << 0,
+ XESCAPE_FORCE_ELLIPSIS = 1 << 1,
} XEscapeFlags;
char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags);
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
index 244b8ade93..63fc9f71d1 100644
--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -212,7 +212,7 @@ static int utf8_char_console_width(const char *str) {
return unichar_iswide(c) ? 2 : 1;
}
-char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis) {
char *p, *s, *prev_s;
size_t n = 0; /* estimated print width */
@@ -229,8 +229,12 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
int len;
char *saved_s = s;
- if (!*str) /* done! */
- goto finish;
+ if (!*str) { /* done! */
+ if (force_ellipsis)
+ goto truncation;
+ else
+ goto finish;
+ }
len = utf8_encoded_valid_unichar(str, SIZE_MAX);
if (len > 0) {
@@ -274,7 +278,7 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
truncation:
/* Try to go back one if we don't have enough space for the ellipsis */
- if (n + 1 >= console_width)
+ if (n + 1 > console_width)
s = prev_s;
s = mempcpy(s, "…", strlen("…"));
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
index 219ca89184..b0e969f655 100644
--- a/src/basic/utf8.h
+++ b/src/basic/utf8.h
@@ -25,9 +25,9 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin
#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
char *utf8_escape_invalid(const char *s);
-char *utf8_escape_non_printable_full(const char *str, size_t console_width);
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis);
static inline char *utf8_escape_non_printable(const char *str) {
- return utf8_escape_non_printable_full(str, SIZE_MAX);
+ return utf8_escape_non_printable_full(str, SIZE_MAX, false);
}
size_t utf8_encode_unichar(char *out_utf8, char32_t g);
diff --git a/src/test/test-escape.c b/src/test/test-escape.c
index 63f9306fb4..991b135a33 100644
--- a/src/test/test-escape.c
+++ b/src/test/test-escape.c
@@ -27,11 +27,11 @@ static void test_xescape_full(bool eight_bits) {
XEscapeFlags flags = eight_bits * XESCAPE_8_BIT;
for (unsigned i = 0; i < 60; i++) {
- _cleanup_free_ char *t;
+ _cleanup_free_ char *t, *q;
assert_se(t = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, flags));
- log_info("%02d: %s", i, t);
+ log_info("%02d: <%s>", i, t);
if (i >= full_fit)
assert_se(streq(t, escaped));
@@ -45,6 +45,15 @@ static void test_xescape_full(bool eight_bits) {
assert_se(strlen(t) == i);
assert_se(strneq(t, "...", i));
}
+
+ assert_se(q = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i,
+ flags | XESCAPE_FORCE_ELLIPSIS));
+
+ log_info("%02d: <%s>", i, q);
+ if (i > 0)
+ assert_se(endswith(q, "."));
+ assert(strlen(q) <= i);
+ assert(strlen(q) + 3 >= strlen(t));
}
}
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index 042b94634b..cdbdfcb054 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -136,32 +136,29 @@ static void test_utf8_escape_non_printable(void) {
static void test_utf8_escape_non_printable_full(void) {
log_info("/* %s */", __func__);
- for (size_t i = 0; i < 20; i++) {
- _cleanup_free_ char *p;
-
- p = utf8_escape_non_printable_full("goo goo goo", i);
- puts(p);
- assert_se(utf8_is_valid(p));
- assert_se(utf8_console_width(p) <= i);
- }
-
- for (size_t i = 0; i < 20; i++) {
- _cleanup_free_ char *p;
-
- p = utf8_escape_non_printable_full("\001 \019\20\a", i);
- puts(p);
- assert_se(utf8_is_valid(p));
- assert_se(utf8_console_width(p) <= i);
- }
-
- for (size_t i = 0; i < 20; i++) {
- _cleanup_free_ char *p;
-
- p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i);
- puts(p);
- assert_se(utf8_is_valid(p));
- assert_se(utf8_console_width(p) <= i);
- }
+ const char *s;
+ FOREACH_STRING(s,
+ "goo goo goo", /* ASCII */
+ "\001 \019\20\a", /* control characters */
+ "\xef\xbf\x30\x13") /* misplaced continuation bytes followed by a digit and cc */
+ for (size_t cw = 0; cw < 22; cw++) {
+ _cleanup_free_ char *p, *q;
+ size_t ew;
+
+ p = utf8_escape_non_printable_full(s, cw, false);
+ ew = utf8_console_width(p);
+ log_debug("%02zu \"%s\" (%zu wasted)", cw, p, cw - ew);
+ assert_se(utf8_is_valid(p));
+ assert_se(ew <= cw);
+
+ q = utf8_escape_non_printable_full(s, cw, true);
+ ew = utf8_console_width(q);
+ log_debug(" \"%s\" (%zu wasted)", q, cw - ew);
+ assert_se(utf8_is_valid(q));
+ assert_se(ew <= cw);
+ if (cw > 0)
+ assert_se(endswith(q, "…"));
+ }
}
static void test_utf16_to_utf8(void) {