diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2021-11-12 10:27:13 +0100 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2021-11-16 13:54:38 +0100 |
commit | 9b49a3b49e78fa46ab5da45e5a176bab3df5803a (patch) | |
tree | f227c2f1c8b7f3e785a8584df275e583e35eb333 /src/basic/utf8.c | |
parent | basic/utf8: inline some iterator variables (diff) | |
download | systemd-9b49a3b49e78fa46ab5da45e5a176bab3df5803a.tar.xz systemd-9b49a3b49e78fa46ab5da45e5a176bab3df5803a.zip |
basic/utf8: add function to convert to ASCII
The conversion must be lossy because ASCII doesn't have enough chars.
Diffstat (limited to 'src/basic/utf8.c')
-rw-r--r-- | src/basic/utf8.c | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/src/basic/utf8.c b/src/basic/utf8.c index 2ad2151816..2532fcf81a 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -312,6 +312,37 @@ char *ascii_is_valid_n(const char *str, size_t len) { return (char*) str; } +int utf8_to_ascii(const char *str, char replacement_char, char **ret) { + /* Convert to a string that has only ASCII chars, replacing anything that is not ASCII + * by replacement_char. */ + + _cleanup_free_ char *ans = new(char, strlen(str) + 1); + if (!ans) + return -ENOMEM; + + char *q = ans; + + for (const char *p = str; *p; q++) { + int l; + + l = utf8_encoded_valid_unichar(p, SIZE_MAX); + if (l < 0) /* Non-UTF-8, let's not even try to propagate the garbage */ + return l; + + if (l == 1) + *q = *p; + else + /* non-ASCII, we need to replace it */ + *q = replacement_char; + + p += l; + } + *q = '\0'; + + *ret = TAKE_PTR(ans); + return 0; +} + /** * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8 * @out_utf8: output buffer of at least 4 bytes or NULL |