summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/udf/unicode.c278
1 files changed, 90 insertions, 188 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 47e61883275d..4d7a674ebce5 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
memcpy(dest->u_name, ptr + 1, exactsize - 1);
}
-/*
- * udf_CS0toUTF8
- *
- * PURPOSE
- * Convert OSTA Compressed Unicode to the UTF-8 equivalent.
- *
- * PRE-CONDITIONS
- * utf Pointer to UTF-8 output buffer.
- * ocu Pointer to OSTA Compressed Unicode input buffer
- * of size UDF_NAME_LEN bytes.
- * both of type "struct ustr *"
- *
- * POST-CONDITIONS
- * <return> >= 0 on success.
- *
- * HISTORY
- * November 12, 1997 - Andrew E. Mileski
- * Written, tested, and released.
- */
-int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
+static int udf_uni2char_utf8(wchar_t uni,
+ unsigned char *out,
+ int boundlen)
{
- const uint8_t *ocu;
- uint8_t cmp_id, ocu_len;
- int i;
-
- ocu_len = ocu_i->u_len;
- if (ocu_len == 0) {
- memset(utf_o, 0, sizeof(struct ustr));
- return 0;
- }
-
- cmp_id = ocu_i->u_cmpID;
- if (cmp_id != 8 && cmp_id != 16) {
- memset(utf_o, 0, sizeof(struct ustr));
- pr_err("unknown compression code (%d) stri=%s\n",
- cmp_id, ocu_i->u_name);
- return -EINVAL;
- }
-
- ocu = ocu_i->u_name;
- utf_o->u_len = 0;
- for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
-
- /* Expand OSTA compressed Unicode to Unicode */
- uint32_t c = ocu[i++];
- if (cmp_id == 16)
- c = (c << 8) | ocu[i++];
-
- /* Compress Unicode to UTF-8 */
- if (c < 0x80U)
- utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
- else if (c < 0x800U) {
- if (utf_o->u_len > (UDF_NAME_LEN - 4))
- break;
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0xc0 | (c >> 6));
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0x80 | (c & 0x3f));
- } else {
- if (utf_o->u_len > (UDF_NAME_LEN - 5))
- break;
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0xe0 | (c >> 12));
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0x80 |
- ((c >> 6) & 0x3f));
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0x80 | (c & 0x3f));
- }
+ int u_len = 0;
+
+ if (boundlen <= 0)
+ return -ENAMETOOLONG;
+
+ if (uni < 0x80) {
+ out[u_len++] = (unsigned char)uni;
+ } else if (uni < 0x800) {
+ if (boundlen < 2)
+ return -ENAMETOOLONG;
+ out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
+ out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
+ } else {
+ if (boundlen < 3)
+ return -ENAMETOOLONG;
+ out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
+ out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
+ out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
}
- utf_o->u_cmpID = 8;
-
- return utf_o->u_len;
+ return u_len;
}
-/*
- *
- * udf_UTF8toCS0
- *
- * PURPOSE
- * Convert UTF-8 to the OSTA Compressed Unicode equivalent.
- *
- * DESCRIPTION
- * This routine is only called by udf_lookup().
- *
- * PRE-CONDITIONS
- * ocu Pointer to OSTA Compressed Unicode output
- * buffer of size UDF_NAME_LEN bytes.
- * utf Pointer to UTF-8 input buffer.
- * utf_len Length of UTF-8 input buffer in bytes.
- *
- * POST-CONDITIONS
- * <return> Zero on success.
- *
- * HISTORY
- * November 12, 1997 - Andrew E. Mileski
- * Written, tested, and released.
- */
-static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
+static int udf_char2uni_utf8(const unsigned char *in,
+ int boundlen,
+ wchar_t *uni)
{
- unsigned c, i, max_val, utf_char;
- int utf_cnt, u_len, u_ch;
+ unsigned int utf_char;
+ unsigned char c;
+ int utf_cnt, u_len;
- memset(ocu, 0, sizeof(dstring) * length);
- ocu[0] = 8;
- max_val = 0xffU;
- u_ch = 1;
-
-try_again:
- u_len = 0U;
- utf_char = 0U;
- utf_cnt = 0U;
- for (i = 0U; i < utf->u_len; i++) {
- /* Name didn't fit? */
- if (u_len + 1 + u_ch >= length)
- return 0;
-
- c = (uint8_t)utf->u_name[i];
+ utf_char = 0;
+ utf_cnt = 0;
+ for (u_len = 0; u_len < boundlen;) {
+ c = in[u_len++];
/* Complete a multi-byte UTF-8 character */
if (utf_cnt) {
- utf_char = (utf_char << 6) | (c & 0x3fU);
+ utf_char = (utf_char << 6) | (c & 0x3f);
if (--utf_cnt)
continue;
} else {
/* Check for a multi-byte UTF-8 character */
- if (c & 0x80U) {
+ if (c & 0x80) {
/* Start a multi-byte UTF-8 character */
- if ((c & 0xe0U) == 0xc0U) {
- utf_char = c & 0x1fU;
+ if ((c & 0xe0) == 0xc0) {
+ utf_char = c & 0x1f;
utf_cnt = 1;
- } else if ((c & 0xf0U) == 0xe0U) {
- utf_char = c & 0x0fU;
+ } else if ((c & 0xf0) == 0xe0) {
+ utf_char = c & 0x0f;
utf_cnt = 2;
- } else if ((c & 0xf8U) == 0xf0U) {
- utf_char = c & 0x07U;
+ } else if ((c & 0xf8) == 0xf0) {
+ utf_char = c & 0x07;
utf_cnt = 3;
- } else if ((c & 0xfcU) == 0xf8U) {
- utf_char = c & 0x03U;
+ } else if ((c & 0xfc) == 0xf8) {
+ utf_char = c & 0x03;
utf_cnt = 4;
- } else if ((c & 0xfeU) == 0xfcU) {
- utf_char = c & 0x01U;
+ } else if ((c & 0xfe) == 0xfc) {
+ utf_char = c & 0x01;
utf_cnt = 5;
} else {
- goto error_out;
+ utf_cnt = -1;
+ break;
}
continue;
} else {
@@ -228,36 +149,19 @@ try_again:
utf_char = c;
}
}
-
- /* Choose no compression if necessary */
- if (utf_char > max_val) {
- if (max_val == 0xffU) {
- max_val = 0xffffU;
- ocu[0] = (uint8_t)0x10U;
- u_ch = 2;
- goto try_again;
- }
- goto error_out;
- }
-
- if (max_val == 0xffffU)
- ocu[++u_len] = (uint8_t)(utf_char >> 8);
- ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
+ *uni = utf_char;
+ break;
}
-
if (utf_cnt) {
-error_out:
- ocu[++u_len] = '?';
- printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
+ *uni = '?';
+ return -EINVAL;
}
-
- ocu[length - 1] = (uint8_t)u_len + 1;
-
- return u_len + 1;
+ return u_len;
}
-static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
- const struct ustr *ocu_i)
+static int udf_name_from_CS0(struct ustr *utf_o,
+ const struct ustr *ocu_i,
+ int (*conv_f)(wchar_t, unsigned char *, int))
{
const uint8_t *ocu;
uint8_t cmp_id, ocu_len;
@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
if (cmp_id == 16)
c = (c << 8) | ocu[i++];
- len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
- UDF_NAME_LEN - 2 - utf_o->u_len);
+ len = conv_f(c, &utf_o->u_name[utf_o->u_len],
+ UDF_NAME_LEN - 2 - utf_o->u_len);
/* Valid character? */
if (len >= 0)
utf_o->u_len += len;
+ else if (len == -ENAMETOOLONG)
+ break;
else
utf_o->u_name[utf_o->u_len++] = '?';
}
@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
return utf_o->u_len;
}
-static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
- int length)
+static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
+ int (*conv_f)(const unsigned char *, int, wchar_t *))
{
- int len;
- unsigned i, max_val;
- uint16_t uni_char;
+ int i, len;
+ unsigned int max_val;
+ wchar_t uni_char;
int u_len, u_ch;
memset(ocu, 0, sizeof(dstring) * length);
ocu[0] = 8;
- max_val = 0xffU;
+ max_val = 0xff;
u_ch = 1;
try_again:
- u_len = 0U;
- for (i = 0U; i < uni->u_len; i++) {
+ u_len = 0;
+ for (i = 0; i < uni->u_len; i++) {
/* Name didn't fit? */
if (u_len + 1 + u_ch >= length)
return 0;
- len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
+ len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
if (!len)
continue;
/* Invalid character, deal with it */
@@ -328,15 +234,15 @@ try_again:
}
if (uni_char > max_val) {
- max_val = 0xffffU;
- ocu[0] = (uint8_t)0x10U;
+ max_val = 0xffff;
+ ocu[0] = 0x10;
u_ch = 2;
goto try_again;
}
- if (max_val == 0xffffU)
+ if (max_val == 0xffff)
ocu[++u_len] = (uint8_t)(uni_char >> 8);
- ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+ ocu[++u_len] = (uint8_t)(uni_char & 0xff);
i += len - 1;
}
@@ -344,10 +250,16 @@ try_again:
return u_len + 1;
}
+int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
+{
+ return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
+}
+
int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
uint8_t *dname, int dlen)
{
struct ustr *filename, *unifilename;
+ int (*conv_f)(wchar_t, unsigned char *, int);
int ret;
if (!slen)
@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
udf_build_ustr_exact(unifilename, sname, slen);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
- ret = udf_CS0toUTF8(filename, unifilename);
- if (ret < 0) {
- udf_debug("Failed in udf_get_filename: sname = %s\n",
- sname);
- goto out2;
- }
+ conv_f = udf_uni2char_utf8;
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
- ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
- unifilename);
- if (ret < 0) {
- udf_debug("Failed in udf_get_filename: sname = %s\n",
- sname);
- goto out2;
- }
+ conv_f = UDF_SB(sb)->s_nls_map->uni2char;
} else
BUG();
+ ret = udf_name_from_CS0(filename, unifilename, conv_f);
+ if (ret < 0) {
+ udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
+ goto out2;
+ }
+
ret = udf_translate_to_linux(dname, dlen,
filename->u_name, filename->u_len,
unifilename->u_name, unifilename->u_len);
@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
uint8_t *dname, int dlen)
{
struct ustr unifilename;
- int namelen;
+ int (*conv_f)(const unsigned char *, int, wchar_t *);
if (!udf_char_to_ustr(&unifilename, sname, slen))
return 0;
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
- namelen = udf_UTF8toCS0(dname, &unifilename, dlen);
- if (!namelen)
- return 0;
+ conv_f = udf_char2uni_utf8;
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
- namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
- &unifilename, dlen);
- if (!namelen)
- return 0;
+ conv_f = UDF_SB(sb)->s_nls_map->char2uni;
} else
- return 0;
+ BUG();
- return namelen;
+ return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
}
#define ILLEGAL_CHAR_MARK '_'