diff options
-rw-r--r-- | fs/udf/unicode.c | 278 |
1 files changed, 90 insertions, 188 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 47e61883275d..4d7a674ebce5 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) memcpy(dest->u_name, ptr + 1, exactsize - 1); } -/* - * udf_CS0toUTF8 - * - * PURPOSE - * Convert OSTA Compressed Unicode to the UTF-8 equivalent. - * - * PRE-CONDITIONS - * utf Pointer to UTF-8 output buffer. - * ocu Pointer to OSTA Compressed Unicode input buffer - * of size UDF_NAME_LEN bytes. - * both of type "struct ustr *" - * - * POST-CONDITIONS - * <return> >= 0 on success. - * - * HISTORY - * November 12, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ -int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) +static int udf_uni2char_utf8(wchar_t uni, + unsigned char *out, + int boundlen) { - const uint8_t *ocu; - uint8_t cmp_id, ocu_len; - int i; - - ocu_len = ocu_i->u_len; - if (ocu_len == 0) { - memset(utf_o, 0, sizeof(struct ustr)); - return 0; - } - - cmp_id = ocu_i->u_cmpID; - if (cmp_id != 8 && cmp_id != 16) { - memset(utf_o, 0, sizeof(struct ustr)); - pr_err("unknown compression code (%d) stri=%s\n", - cmp_id, ocu_i->u_name); - return -EINVAL; - } - - ocu = ocu_i->u_name; - utf_o->u_len = 0; - for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { - - /* Expand OSTA compressed Unicode to Unicode */ - uint32_t c = ocu[i++]; - if (cmp_id == 16) - c = (c << 8) | ocu[i++]; - - /* Compress Unicode to UTF-8 */ - if (c < 0x80U) - utf_o->u_name[utf_o->u_len++] = (uint8_t)c; - else if (c < 0x800U) { - if (utf_o->u_len > (UDF_NAME_LEN - 4)) - break; - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0xc0 | (c >> 6)); - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0x80 | (c & 0x3f)); - } else { - if (utf_o->u_len > (UDF_NAME_LEN - 5)) - break; - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0xe0 | (c >> 12)); - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0x80 | - ((c >> 6) & 0x3f)); - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0x80 | (c & 0x3f)); - } + int u_len = 0; + + if (boundlen <= 0) + return -ENAMETOOLONG; + + if (uni < 0x80) { + out[u_len++] = (unsigned char)uni; + } else if (uni < 0x800) { + if (boundlen < 2) + return -ENAMETOOLONG; + out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); + out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); + } else { + if (boundlen < 3) + return -ENAMETOOLONG; + out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); + out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); + out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); } - utf_o->u_cmpID = 8; - - return utf_o->u_len; + return u_len; } -/* - * - * udf_UTF8toCS0 - * - * PURPOSE - * Convert UTF-8 to the OSTA Compressed Unicode equivalent. - * - * DESCRIPTION - * This routine is only called by udf_lookup(). - * - * PRE-CONDITIONS - * ocu Pointer to OSTA Compressed Unicode output - * buffer of size UDF_NAME_LEN bytes. - * utf Pointer to UTF-8 input buffer. - * utf_len Length of UTF-8 input buffer in bytes. - * - * POST-CONDITIONS - * <return> Zero on success. - * - * HISTORY - * November 12, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ -static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) +static int udf_char2uni_utf8(const unsigned char *in, + int boundlen, + wchar_t *uni) { - unsigned c, i, max_val, utf_char; - int utf_cnt, u_len, u_ch; + unsigned int utf_char; + unsigned char c; + int utf_cnt, u_len; - memset(ocu, 0, sizeof(dstring) * length); - ocu[0] = 8; - max_val = 0xffU; - u_ch = 1; - -try_again: - u_len = 0U; - utf_char = 0U; - utf_cnt = 0U; - for (i = 0U; i < utf->u_len; i++) { - /* Name didn't fit? */ - if (u_len + 1 + u_ch >= length) - return 0; - - c = (uint8_t)utf->u_name[i]; + utf_char = 0; + utf_cnt = 0; + for (u_len = 0; u_len < boundlen;) { + c = in[u_len++]; /* Complete a multi-byte UTF-8 character */ if (utf_cnt) { - utf_char = (utf_char << 6) | (c & 0x3fU); + utf_char = (utf_char << 6) | (c & 0x3f); if (--utf_cnt) continue; } else { /* Check for a multi-byte UTF-8 character */ - if (c & 0x80U) { + if (c & 0x80) { /* Start a multi-byte UTF-8 character */ - if ((c & 0xe0U) == 0xc0U) { - utf_char = c & 0x1fU; + if ((c & 0xe0) == 0xc0) { + utf_char = c & 0x1f; utf_cnt = 1; - } else if ((c & 0xf0U) == 0xe0U) { - utf_char = c & 0x0fU; + } else if ((c & 0xf0) == 0xe0) { + utf_char = c & 0x0f; utf_cnt = 2; - } else if ((c & 0xf8U) == 0xf0U) { - utf_char = c & 0x07U; + } else if ((c & 0xf8) == 0xf0) { + utf_char = c & 0x07; utf_cnt = 3; - } else if ((c & 0xfcU) == 0xf8U) { - utf_char = c & 0x03U; + } else if ((c & 0xfc) == 0xf8) { + utf_char = c & 0x03; utf_cnt = 4; - } else if ((c & 0xfeU) == 0xfcU) { - utf_char = c & 0x01U; + } else if ((c & 0xfe) == 0xfc) { + utf_char = c & 0x01; utf_cnt = 5; } else { - goto error_out; + utf_cnt = -1; + break; } continue; } else { @@ -228,36 +149,19 @@ try_again: utf_char = c; } } - - /* Choose no compression if necessary */ - if (utf_char > max_val) { - if (max_val == 0xffU) { - max_val = 0xffffU; - ocu[0] = (uint8_t)0x10U; - u_ch = 2; - goto try_again; - } - goto error_out; - } - - if (max_val == 0xffffU) - ocu[++u_len] = (uint8_t)(utf_char >> 8); - ocu[++u_len] = (uint8_t)(utf_char & 0xffU); + *uni = utf_char; + break; } - if (utf_cnt) { -error_out: - ocu[++u_len] = '?'; - printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n")); + *uni = '?'; + return -EINVAL; } - - ocu[length - 1] = (uint8_t)u_len + 1; - - return u_len + 1; + return u_len; } -static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, - const struct ustr *ocu_i) +static int udf_name_from_CS0(struct ustr *utf_o, + const struct ustr *ocu_i, + int (*conv_f)(wchar_t, unsigned char *, int)) { const uint8_t *ocu; uint8_t cmp_id, ocu_len; @@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, if (cmp_id == 16) c = (c << 8) | ocu[i++]; - len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - 2 - utf_o->u_len); + len = conv_f(c, &utf_o->u_name[utf_o->u_len], + UDF_NAME_LEN - 2 - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; + else if (len == -ENAMETOOLONG) + break; else utf_o->u_name[utf_o->u_len++] = '?'; } @@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, return utf_o->u_len; } -static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, - int length) +static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length, + int (*conv_f)(const unsigned char *, int, wchar_t *)) { - int len; - unsigned i, max_val; - uint16_t uni_char; + int i, len; + unsigned int max_val; + wchar_t uni_char; int u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; - max_val = 0xffU; + max_val = 0xff; u_ch = 1; try_again: - u_len = 0U; - for (i = 0U; i < uni->u_len; i++) { + u_len = 0; + for (i = 0; i < uni->u_len; i++) { /* Name didn't fit? */ if (u_len + 1 + u_ch >= length) return 0; - len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); + len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char); if (!len) continue; /* Invalid character, deal with it */ @@ -328,15 +234,15 @@ try_again: } if (uni_char > max_val) { - max_val = 0xffffU; - ocu[0] = (uint8_t)0x10U; + max_val = 0xffff; + ocu[0] = 0x10; u_ch = 2; goto try_again; } - if (max_val == 0xffffU) + if (max_val == 0xffff) ocu[++u_len] = (uint8_t)(uni_char >> 8); - ocu[++u_len] = (uint8_t)(uni_char & 0xffU); + ocu[++u_len] = (uint8_t)(uni_char & 0xff); i += len - 1; } @@ -344,10 +250,16 @@ try_again: return u_len + 1; } +int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) +{ + return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8); +} + int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, uint8_t *dname, int dlen) { struct ustr *filename, *unifilename; + int (*conv_f)(wchar_t, unsigned char *, int); int ret; if (!slen) @@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, udf_build_ustr_exact(unifilename, sname, slen); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { - ret = udf_CS0toUTF8(filename, unifilename); - if (ret < 0) { - udf_debug("Failed in udf_get_filename: sname = %s\n", - sname); - goto out2; - } + conv_f = udf_uni2char_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { - ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename, - unifilename); - if (ret < 0) { - udf_debug("Failed in udf_get_filename: sname = %s\n", - sname); - goto out2; - } + conv_f = UDF_SB(sb)->s_nls_map->uni2char; } else BUG(); + ret = udf_name_from_CS0(filename, unifilename, conv_f); + if (ret < 0) { + udf_debug("Failed in udf_get_filename: sname = %s\n", sname); + goto out2; + } + ret = udf_translate_to_linux(dname, dlen, filename->u_name, filename->u_len, unifilename->u_name, unifilename->u_len); @@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, uint8_t *dname, int dlen) { struct ustr unifilename; - int namelen; + int (*conv_f)(const unsigned char *, int, wchar_t *); if (!udf_char_to_ustr(&unifilename, sname, slen)) return 0; if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { - namelen = udf_UTF8toCS0(dname, &unifilename, dlen); - if (!namelen) - return 0; + conv_f = udf_char2uni_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { - namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, - &unifilename, dlen); - if (!namelen) - return 0; + conv_f = UDF_SB(sb)->s_nls_map->char2uni; } else - return 0; + BUG(); - return namelen; + return udf_name_to_CS0(dname, &unifilename, dlen, conv_f); } #define ILLEGAL_CHAR_MARK '_' |