diff options
author | Shawn Landden <shawn@churchofgit.com> | 2013-09-21 03:37:33 +0200 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2013-10-13 23:56:54 +0200 |
commit | f405e86de361ec305dc2b8634efeaa23dc144053 (patch) | |
tree | b3fe37e4a143a5d2dee14eb34d96f96cb03b84fc /src/shared/gunicode.c | |
parent | udevadm.xml: document --resolve-names option for test (diff) | |
download | systemd-f405e86de361ec305dc2b8634efeaa23dc144053.tar.xz systemd-f405e86de361ec305dc2b8634efeaa23dc144053.zip |
util, utf8: make ellipsize take multi-byte characters into account
rename old versions to ascii_*
Do not take into account zerowidth characters, but do consider double-wide characters.
Import needed utf8 helper code from glib.
v3: rebase ontop of utf8 restructuring work
[zj: tweak the algorithm a bit, move new code to separate file]
Diffstat (limited to 'src/shared/gunicode.c')
-rw-r--r-- | src/shared/gunicode.c | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/src/shared/gunicode.c b/src/shared/gunicode.c new file mode 100644 index 0000000000..f2d4608340 --- /dev/null +++ b/src/shared/gunicode.c @@ -0,0 +1,108 @@ +/* gunicode.c - Unicode manipulation functions + * + * Copyright (C) 1999, 2000 Tom Tromey + * Copyright 2000, 2005 Red Hat, Inc. + */ + +#include "gunicode.h" + +#define unichar uint32_t + +/** + * g_utf8_prev_char: + * @p: a pointer to a position within a UTF-8 encoded string + * + * Finds the previous UTF-8 character in the string before @p. + * + * @p does not have to be at the beginning of a UTF-8 character. No check + * is made to see if the character found is actually valid other than + * it starts with an appropriate byte. If @p might be the first + * character of the string, you must use g_utf8_find_prev_char() instead. + * + * Return value: a pointer to the found character. + **/ +char * +utf8_prev_char (const char *p) +{ + while (1) + { + p--; + if ((*p & 0xc0) != 0x80) + return (char *)p; + } +} + +struct Interval +{ + unichar start, end; +}; + +static int +interval_compare (const void *key, const void *elt) +{ + unichar c = (unichar) (long) (key); + struct Interval *interval = (struct Interval *)elt; + + if (c < interval->start) + return -1; + if (c > interval->end) + return +1; + + return 0; +} + +/* + * NOTE: + * + * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are + * generated from the Unicode Character Database's file + * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py + * in this way: + * + * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt + * + * Last update for Unicode 6.0. + */ + +/** + * g_unichar_iswide: + * @c: a Unicode character + * + * Determines if a character is typically rendered in a double-width + * cell. + * + * Return value: %TRUE if the character is wide + **/ +bool +unichar_iswide (unichar c) +{ + /* See NOTE earlier for how to update this table. */ + static const struct Interval wide[] = { + {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, + {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, + {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA}, + {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE}, + {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, + {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, + {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, + {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, {0x1F240, + 0x1F248}, {0x1F250, 0x1F251}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD} + }; + + if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0], + interval_compare)) + return true; + + return false; +} + +const char utf8_skip_data[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 +}; |