diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2021-11-12 10:27:13 +0100 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2021-11-16 13:54:38 +0100 |
commit | 9b49a3b49e78fa46ab5da45e5a176bab3df5803a (patch) | |
tree | f227c2f1c8b7f3e785a8584df275e583e35eb333 /src/basic | |
parent | basic/utf8: inline some iterator variables (diff) | |
download | systemd-9b49a3b49e78fa46ab5da45e5a176bab3df5803a.tar.xz systemd-9b49a3b49e78fa46ab5da45e5a176bab3df5803a.zip |
basic/utf8: add function to convert to ASCII
The conversion must be lossy because ASCII doesn't have enough chars.
Diffstat (limited to 'src/basic')
-rw-r--r-- | src/basic/utf8.c | 31 | ||||
-rw-r--r-- | src/basic/utf8.h | 2 |
2 files changed, 33 insertions, 0 deletions
diff --git a/src/basic/utf8.c b/src/basic/utf8.c index 2ad2151816..2532fcf81a 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -312,6 +312,37 @@ char *ascii_is_valid_n(const char *str, size_t len) { return (char*) str; } +int utf8_to_ascii(const char *str, char replacement_char, char **ret) { + /* Convert to a string that has only ASCII chars, replacing anything that is not ASCII + * by replacement_char. */ + + _cleanup_free_ char *ans = new(char, strlen(str) + 1); + if (!ans) + return -ENOMEM; + + char *q = ans; + + for (const char *p = str; *p; q++) { + int l; + + l = utf8_encoded_valid_unichar(p, SIZE_MAX); + if (l < 0) /* Non-UTF-8, let's not even try to propagate the garbage */ + return l; + + if (l == 1) + *q = *p; + else + /* non-ASCII, we need to replace it */ + *q = replacement_char; + + p += l; + } + *q = '\0'; + + *ret = TAKE_PTR(ans); + return 0; +} + /** * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8 * @out_utf8: output buffer of at least 4 bytes or NULL diff --git a/src/basic/utf8.h b/src/basic/utf8.h index b0e969f655..4a06dd62c5 100644 --- a/src/basic/utf8.h +++ b/src/basic/utf8.h @@ -21,6 +21,8 @@ static inline char *utf8_is_valid(const char *s) { char *ascii_is_valid(const char *s) _pure_; char *ascii_is_valid_n(const char *str, size_t len); +int utf8_to_ascii(const char *str, char replacement_char, char **ret); + bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) _pure_; #define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true) |