diff options
author | Christoph Hellwig <hch@lst.de> | 2021-09-15 09:00:04 +0200 |
---|---|---|
committer | Gabriel Krisman Bertazi <krisman@collabora.com> | 2021-10-11 22:02:02 +0200 |
commit | 6ca99ce756c27852d1ea1e555045de1c920f30ed (patch) | |
tree | 3cedb519da25bfe7e8c3cce48f03c47599ce953e /fs/unicode/utf8-norm.c | |
parent | unicode: move utf8cursor to utf8-selftest.c (diff) | |
download | linux-6ca99ce756c27852d1ea1e555045de1c920f30ed.tar.xz linux-6ca99ce756c27852d1ea1e555045de1c920f30ed.zip |
unicode: cache the normalization tables in struct unicode_map
Instead of repeatedly looking up the version add pointers to the
NFD and NFD+CF tables to struct unicode_map, and pass a
unicode_map plus index to the functions using the normalization
tables.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Diffstat (limited to 'fs/unicode/utf8-norm.c')
-rw-r--r-- | fs/unicode/utf8-norm.c | 45 |
1 files changed, 20 insertions, 25 deletions
diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c index 1ac90fa00070..7c1f28ab31a8 100644 --- a/fs/unicode/utf8-norm.c +++ b/fs/unicode/utf8-norm.c @@ -309,21 +309,19 @@ utf8hangul(const char *str, unsigned char *hangul) * is well-formed and corresponds to a known unicode code point. The * shorthand for this will be "is valid UTF-8 unicode". */ -static utf8leaf_t *utf8nlookup(const struct utf8data *data, - unsigned char *hangul, const char *s, size_t len) +static utf8leaf_t *utf8nlookup(const struct unicode_map *um, + enum utf8_normalization n, unsigned char *hangul, const char *s, + size_t len) { - utf8trie_t *trie = NULL; + utf8trie_t *trie = utf8data + um->ntab[n]->offset; int offlen; int offset; int mask; int node; - if (!data) - return NULL; if (len == 0) return NULL; - trie = utf8data + data->offset; node = 1; while (node) { offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT; @@ -385,29 +383,28 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data, * * Forwards to utf8nlookup(). */ -static utf8leaf_t *utf8lookup(const struct utf8data *data, - unsigned char *hangul, const char *s) +static utf8leaf_t *utf8lookup(const struct unicode_map *um, + enum utf8_normalization n, unsigned char *hangul, const char *s) { - return utf8nlookup(data, hangul, s, (size_t)-1); + return utf8nlookup(um, n, hangul, s, (size_t)-1); } /* * Length of the normalization of s, touch at most len bytes. * Return -1 if s is not valid UTF-8 unicode. */ -ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len) +ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, + const char *s, size_t len) { utf8leaf_t *leaf; size_t ret = 0; unsigned char hangul[UTF8HANGULLEAF]; - if (!data) - return -1; while (len && *s) { - leaf = utf8nlookup(data, hangul, s, len); + leaf = utf8nlookup(um, n, hangul, s, len); if (!leaf) return -1; - if (utf8agetab[LEAF_GEN(leaf)] > data->maxage) + if (utf8agetab[LEAF_GEN(leaf)] > um->ntab[n]->maxage) ret += utf8clen(s); else if (LEAF_CCC(leaf) == DECOMPOSE) ret += strlen(LEAF_STR(leaf)); @@ -430,14 +427,13 @@ EXPORT_SYMBOL(utf8nlen); * * Returns -1 on error, 0 on success. */ -int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, - const char *s, size_t len) +int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, + enum utf8_normalization n, const char *s, size_t len) { - if (!data) - return -1; if (!s) return -1; - u8c->data = data; + u8c->um = um; + u8c->n = n; u8c->s = s; u8c->p = NULL; u8c->ss = NULL; @@ -512,9 +508,9 @@ int utf8byte(struct utf8cursor *u8c) /* Look up the data for the current character. */ if (u8c->p) { - leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); + leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s); } else { - leaf = utf8nlookup(u8c->data, u8c->hangul, + leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul, u8c->s, u8c->len); } @@ -524,7 +520,8 @@ int utf8byte(struct utf8cursor *u8c) ccc = LEAF_CCC(leaf); /* Characters that are too new have CCC 0. */ - if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) { + if (utf8agetab[LEAF_GEN(leaf)] > + u8c->um->ntab[u8c->n]->maxage) { ccc = STOPPER; } else if (ccc == DECOMPOSE) { u8c->len -= utf8clen(u8c->s); @@ -538,7 +535,7 @@ int utf8byte(struct utf8cursor *u8c) goto ccc_mismatch; } - leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); + leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s); if (!leaf) return -1; ccc = LEAF_CCC(leaf); @@ -611,7 +608,6 @@ const struct utf8data *utf8nfdi(unsigned int maxage) return NULL; return &utf8nfdidata[i]; } -EXPORT_SYMBOL(utf8nfdi); const struct utf8data *utf8nfdicf(unsigned int maxage) { @@ -623,4 +619,3 @@ const struct utf8data *utf8nfdicf(unsigned int maxage) return NULL; return &utf8nfdicfdata[i]; } -EXPORT_SYMBOL(utf8nfdicf); |