summaryrefslogtreecommitdiffstats
path: root/jnlib
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>2006-09-22 20:15:18 +0200
committerWerner Koch <wk@gnupg.org>2006-09-22 20:15:18 +0200
commit2db8df0ba388f0a39424b76ce00fe5b51abbd54d (patch)
tree20ec3eea82409a6329e241b92c9c16ef07d07456 /jnlib
parentImproved yat2m. (diff)
downloadgnupg2-2db8df0ba388f0a39424b76ce00fe5b51abbd54d.tar.xz
gnupg2-2db8df0ba388f0a39424b76ce00fe5b51abbd54d.zip
Added iconv support and doc cleanups.
Diffstat (limited to 'jnlib')
-rw-r--r--jnlib/ChangeLog6
-rw-r--r--jnlib/utf8conv.c572
2 files changed, 383 insertions, 195 deletions
diff --git a/jnlib/ChangeLog b/jnlib/ChangeLog
index f601d9b14..6c38de2b1 100644
--- a/jnlib/ChangeLog
+++ b/jnlib/ChangeLog
@@ -1,3 +1,9 @@
+2006-09-22 Werner Koch <wk@g10code.com>
+
+ * utf8conv.c: Reworked to match the gnupg 1.4.5 code. This now
+ requires iconv support but this is reasonable for all modern
+ systems.
+
2006-08-29 Werner Koch <wk@g10code.com>
* logging.c (do_logv): Emit a missing LF for fatal errors.
diff --git a/jnlib/utf8conv.c b/jnlib/utf8conv.c
index 9fba1ed4f..ebb6ef3fd 100644
--- a/jnlib/utf8conv.c
+++ b/jnlib/utf8conv.c
@@ -28,101 +28,225 @@
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif
+#include <errno.h>
+#include <iconv.h>
#include "libjnlib-config.h"
#include "stringhelp.h"
#include "utf8conv.h"
+#ifndef MB_LEN_MAX
+#define MB_LEN_MAX 16
+#endif
+
+static const char *active_charset_name = "iso-8859-1";
+static unsigned short *active_charset;
+static int no_translation; /* Set to true if we let simply pass through. */
+static int use_iconv; /* iconv comversion fucntions required. */
-static ushort koi8_unicode[128] = {
- 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
- 0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
- 0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
- 0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
- 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
- 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,
- 0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
- 0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,
- 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
- 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
- 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
- 0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
- 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
- 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
- 0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
- 0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a
-};
-
-static ushort latin2_unicode[128] = {
- 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
- 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
- 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
- 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
- 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
- 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
- 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
- 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
- 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
- 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
- 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
- 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
- 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
- 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
- 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
- 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
-};
-static const char *active_charset_name = "iso-8859-1";
-static ushort *active_charset = NULL;
-static int no_translation = 0;
+/* Error handler for iconv failures. This is needed to not clutter the
+ output with repeated diagnostics about a missing conversion. */
+static void
+handle_iconv_error (const char *to, const char *from, int use_fallback)
+{
+ if (errno == EINVAL)
+ {
+ static int shown1, shown2;
+ int x;
+
+ if (to && !strcmp (to, "utf-8"))
+ {
+ x = shown1;
+ shown1 = 1;
+ }
+ else
+ {
+ x = shown2;
+ shown2 = 1;
+ }
+
+ if (!x)
+ log_info (_("conversion from `%s' to `%s' not available\n"),
+ from, to);
+ }
+ else
+ {
+ static int shown;
+
+ if (!shown)
+ log_info (_("iconv_open failed: %s\n"), strerror (errno));
+ shown = 1;
+ }
+
+ if (use_fallback)
+ {
+ /* To avoid further error messages we fallback to Latin-1 for the
+ native encoding. This is justified as one can expect that on a
+ utf-8 enabled system nl_langinfo() will work and thus we won't
+ never get to here. Thus Latin-1 seems to be a reasonable
+ default. */
+ active_charset_name = "iso-8859-1";
+ no_translation = 0;
+ active_charset = NULL;
+ use_iconv = 0;
+ }
+}
+
int
set_native_charset (const char *newset)
{
- if (!newset)
+ const char *full_newset;
+
+ if (!newset)
+ {
+#ifdef HABE_W32_SYSTEM
+ static char codepage[30];
+ unsigned int cpno;
+ const char *aliases;
+
+ /* We are a console program thus we need to use the
+ GetConsoleOutputCP function and not the the GetACP which
+ would give the codepage for a GUI program. Note this is not
+ a bulletproof detection because GetConsoleCP might return a
+ different one for console input. Not sure how to cope with
+ that. If the console Code page is not known we fall back to
+ the system code page. */
+ cpno = GetConsoleOutputCP ();
+ if (!cpno)
+ cpno = GetACP ();
+ sprintf (codepage, "CP%u", cpno );
+ /* Resolve alias. We use a long string string and not the usual
+ array to optimize if the code is taken to a DSO. Taken from
+ libiconv 1.9.2. */
+ newset = codepage;
+ for (aliases = ("CP936" "\0" "GBK" "\0"
+ "CP1361" "\0" "JOHAB" "\0"
+ "CP20127" "\0" "ASCII" "\0"
+ "CP20866" "\0" "KOI8-R" "\0"
+ "CP21866" "\0" "KOI8-RU" "\0"
+ "CP28591" "\0" "ISO-8859-1" "\0"
+ "CP28592" "\0" "ISO-8859-2" "\0"
+ "CP28593" "\0" "ISO-8859-3" "\0"
+ "CP28594" "\0" "ISO-8859-4" "\0"
+ "CP28595" "\0" "ISO-8859-5" "\0"
+ "CP28596" "\0" "ISO-8859-6" "\0"
+ "CP28597" "\0" "ISO-8859-7" "\0"
+ "CP28598" "\0" "ISO-8859-8" "\0"
+ "CP28599" "\0" "ISO-8859-9" "\0"
+ "CP28605" "\0" "ISO-8859-15" "\0"
+ "CP65001" "\0" "UTF-8" "\0");
+ *aliases;
+ aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
+ {
+ if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
+ {
+ newset = aliases + strlen (aliases) + 1;
+ break;
+ }
+ }
+
+#else /*!HAVE_W32_SYSTEM*/
+
#ifdef HAVE_LANGINFO_CODESET
- newset = nl_langinfo (CODESET);
-#else
- newset = "8859-1";
-#endif
+ newset = nl_langinfo (CODESET);
+#else /*!HAVE_LANGINFO_CODESET*/
+ /* Try to get the used charset from environment variables. */
+ static char codepage[30];
+ const char *lc, *dot, *mod;
+
+ strcpy (codepage, "iso-8859-1");
+ lc = getenv ("LC_ALL");
+ if (!lc || !*lc)
+ {
+ lc = getenv ("LC_CTYPE");
+ if (!lc || !*lc)
+ lc = getenv ("LANG");
+ }
+ if (lc && *lc)
+ {
+ dot = strchr (lc, '.');
+ if (dot)
+ {
+ mod = strchr (++dot, '@');
+ if (!mod)
+ mod = dot + strlen (dot);
+ if (mod - dot < sizeof codepage && dot != mod)
+ {
+ memcpy (codepage, dot, mod - dot);
+ codepage [mod - dot] = 0;
+ }
+ }
+ }
+ newset = codepage;
+#endif /*!HAVE_LANGINFO_CODESET*/
+#endif /*!HAVE_W32_SYSTEM*/
+ }
+ full_newset = newset;
if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
{
newset += 3;
if (*newset == '-' || *newset == '_')
- newset++;
+ newset++;
}
- if (!*newset
- || !ascii_strcasecmp (newset, "8859-1")
- || !ascii_strcasecmp (newset, "8859-15"))
+ /* Note that we silently assume that plain ASCII is actually meant
+ as Latin-1. This makes sense because many Unix system don't have
+ their locale set up properly and thus would get annoying error
+ messages and we have to handle all the "bug" reports. Latin-1 has
+ always been the character set used for 8 bit characters on Unix
+ systems. */
+ if ( !*newset
+ || !ascii_strcasecmp (newset, "8859-1" )
+ || !ascii_strcasecmp (newset, "646" )
+ || !ascii_strcasecmp (newset, "ASCII" )
+ || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
+ )
{
active_charset_name = "iso-8859-1";
no_translation = 0;
active_charset = NULL;
+ use_iconv = 0;
}
- else if (!ascii_strcasecmp (newset, "8859-2"))
- {
- active_charset_name = "iso-8859-2";
- no_translation = 0;
- active_charset = latin2_unicode;
- }
- else if (!ascii_strcasecmp (newset, "koi8-r"))
- {
- active_charset_name = "koi8-r";
- no_translation = 0;
- active_charset = koi8_unicode;
- }
- else if (!ascii_strcasecmp (newset, "utf8")
- || !ascii_strcasecmp (newset, "utf-8"))
+ else if ( !ascii_strcasecmp (newset, "utf8" )
+ || !ascii_strcasecmp(newset, "utf-8") )
{
active_charset_name = "utf-8";
no_translation = 1;
active_charset = NULL;
+ use_iconv = 0;
}
else
- return -1;
+ {
+ iconv_t cd;
+
+#ifdef HAVE_W32_SYSTEM
+ if (load_libiconv ())
+ return -1;
+#endif /*HAVE_W32_SYSTEM*/
+
+ cd = iconv_open (full_newset, "utf-8");
+ if (cd == (iconv_t)-1)
+ {
+ handle_iconv_error (full_newset, "utf-8", 0);
+ return -1;
+ }
+ iconv_close (cd);
+ cd = iconv_open ("utf-8", full_newset);
+ if (cd == (iconv_t)-1)
+ {
+ handle_iconv_error ("utf-8", full_newset, 0);
+ return -1;
+ }
+ iconv_close (cd);
+ active_charset_name = full_newset;
+ no_translation = 0;
+ active_charset = NULL;
+ use_iconv = 1;
+ }
return 0;
}
@@ -132,10 +256,9 @@ get_native_charset ()
return active_charset_name;
}
-/****************
- * Convert string, which is in native encoding to UTF8 and return the
- * new allocated UTF8 string.
- */
+
+/* Convert string, which is in native encoding to UTF8 and return a
+ new allocated UTF-8 string. */
char *
native_to_utf8 (const char *orig_string)
{
@@ -147,41 +270,12 @@ native_to_utf8 (const char *orig_string)
if (no_translation)
{
+ /* Already utf-8 encoded. */
buffer = jnlib_xstrdup (orig_string);
}
- else if (active_charset)
- {
- for (s = string; *s; s++)
- {
- length++;
- if (*s & 0x80)
- length += 2; /* we may need 3 bytes */
- }
- buffer = jnlib_xmalloc (length + 1);
- for (p = (unsigned char *)buffer, s = string; *s; s++)
- {
- if ((*s & 0x80))
- {
- ushort val = active_charset[*s & 0x7f];
- if (val < 0x0800)
- {
- *p++ = 0xc0 | ((val >> 6) & 0x1f);
- *p++ = 0x80 | (val & 0x3f);
- }
- else
- {
- *p++ = 0xe0 | ((val >> 12) & 0x0f);
- *p++ = 0x80 | ((val >> 6) & 0x3f);
- *p++ = 0x80 | (val & 0x3f);
- }
- }
- else
- *p++ = *s;
- }
- *p = 0;
- }
- else
+ else if (!active_charset && !use_iconv)
{
+ /* For Latin-1 we can avoid the iconv overhead. */
for (s = string; *s; s++)
{
length++;
@@ -191,7 +285,7 @@ native_to_utf8 (const char *orig_string)
buffer = jnlib_xmalloc (length + 1);
for (p = (unsigned char *)buffer, s = string; *s; s++)
{
- if (*s & 0x80)
+ if ( (*s & 0x80 ))
{
*p++ = 0xc0 | ((*s >> 6) & 3);
*p++ = 0x80 | (*s & 0x3f);
@@ -201,22 +295,68 @@ native_to_utf8 (const char *orig_string)
}
*p = 0;
}
+ else
+ {
+ /* Need to use iconv. */
+ iconv_t cd;
+ const char *inptr;
+ char *outptr;
+ size_t inbytes, outbytes;
+
+ cd = iconv_open ("utf-8", active_charset_name);
+ if (cd == (iconv_t)-1)
+ {
+ handle_iconv_error ("utf-8", active_charset_name, 1);
+ return native_to_utf8 (string);
+ }
+
+ for (s=string; *s; s++ )
+ {
+ length++;
+ if ((*s & 0x80))
+ length += 5; /* We may need up to 6 bytes for the utf8 output. */
+ }
+ buffer = jnlib_xmalloc (length + 1);
+
+ inptr = string;
+ inbytes = strlen (string);
+ outptr = buffer;
+ outbytes = length;
+ if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
+ &outptr, &outbytes) == (size_t)-1)
+ {
+ static int shown;
+
+ if (!shown)
+ log_info (_("conversion from `%s' to `%s' failed: %s\n"),
+ active_charset_name, "utf-8", strerror (errno));
+ shown = 1;
+ /* We don't do any conversion at all but use the strings as is. */
+ strcpy (buffer, string);
+ }
+ else /* Success. */
+ {
+ *outptr = 0;
+ /* We could realloc the buffer now but I doubt that it makes
+ much sense given that it will get freed anyway soon
+ after. */
+ }
+ iconv_close (cd);
+ }
return buffer;
}
-/* Convert string, which is in UTF8 to native encoding. Replace
- * illegal encodings by some "\xnn" and quote all control
- * characters. A character with value DELIM will always be quoted, it
- * must be a vanilla ASCII character. */
-char *
-utf8_to_native (const char *string, size_t length, int delim)
+
+static char *
+do_utf8_to_native (const char *string, size_t length, int delim,
+ int with_iconv)
{
int nleft;
int i;
unsigned char encbuf[8];
int encidx;
- const byte *s;
+ const unsigned char *s;
size_t n;
char *buffer = NULL;
char *p = NULL;
@@ -224,19 +364,20 @@ utf8_to_native (const char *string, size_t length, int delim)
size_t slen;
int resync = 0;
- /* 1. pass (p==NULL): count the extended utf-8 characters */
- /* 2. pass (p!=NULL): create string */
+ /* First pass (p==NULL): count the extended utf-8 characters. */
+ /* Second pass (p!=NULL): create string. */
for (;;)
{
for (slen = length, nleft = encidx = 0, n = 0,
- s = (const unsigned char *)string; slen;
+ s = (const unsigned char *)string;
+ slen;
s++, slen--)
{
if (resync)
{
if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
{
- /* still invalid */
+ /* Still invalid. */
if (p)
{
sprintf (p, "\\x%02x", *s);
@@ -250,45 +391,23 @@ utf8_to_native (const char *string, size_t length, int delim)
if (!nleft)
{
if (!(*s & 0x80))
- { /* plain ascii */
- if (*s < 0x20 || *s == 0x7f || *s == delim ||
- (delim && *s == '\\'))
+ {
+ /* Plain ascii. */
+ if ( delim != -1
+ && (*s < 0x20 || *s == 0x7f || *s == delim
+ || (delim && *s == '\\')))
{
n++;
if (p)
*p++ = '\\';
switch (*s)
{
- case '\n':
- n++;
- if (p)
- *p++ = 'n';
- break;
- case '\r':
- n++;
- if (p)
- *p++ = 'r';
- break;
- case '\f':
- n++;
- if (p)
- *p++ = 'f';
- break;
- case '\v':
- n++;
- if (p)
- *p++ = 'v';
- break;
- case '\b':
- n++;
- if (p)
- *p++ = 'b';
- break;
- case 0:
- n++;
- if (p)
- *p++ = '0';
- break;
+ case '\n': n++; if ( p ) *p++ = 'n'; break;
+ case '\r': n++; if ( p ) *p++ = 'r'; break;
+ case '\f': n++; if ( p ) *p++ = 'f'; break;
+ case '\v': n++; if ( p ) *p++ = 'v'; break;
+ case '\b': n++; if ( p ) *p++ = 'b'; break;
+ case 0: n++; if ( p ) *p++ = '0'; break;
default:
n += 3;
if (p)
@@ -306,43 +425,43 @@ utf8_to_native (const char *string, size_t length, int delim)
n++;
}
}
- else if ((*s & 0xe0) == 0xc0)
- { /* 110x xxxx */
+ else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
+ {
val = *s & 0x1f;
nleft = 1;
encidx = 0;
encbuf[encidx++] = *s;
}
- else if ((*s & 0xf0) == 0xe0)
- { /* 1110 xxxx */
+ else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
+ {
val = *s & 0x0f;
nleft = 2;
encidx = 0;
encbuf[encidx++] = *s;
}
- else if ((*s & 0xf8) == 0xf0)
- { /* 1111 0xxx */
+ else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
+ {
val = *s & 0x07;
nleft = 3;
encidx = 0;
encbuf[encidx++] = *s;
}
- else if ((*s & 0xfc) == 0xf8)
- { /* 1111 10xx */
+ else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
+ {
val = *s & 0x03;
nleft = 4;
encidx = 0;
encbuf[encidx++] = *s;
}
- else if ((*s & 0xfe) == 0xfc)
- { /* 1111 110x */
+ else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
+ {
val = *s & 0x01;
nleft = 5;
encidx = 0;
encbuf[encidx++] = *s;
}
- else
- { /* invalid encoding: print as \xnn */
+ else /* Invalid encoding: print as \xNN. */
+ {
if (p)
{
sprintf (p, "\\x%02x", *s);
@@ -352,8 +471,8 @@ utf8_to_native (const char *string, size_t length, int delim)
resync = 1;
}
}
- else if (*s < 0x80 || *s >= 0xc0)
- { /* invalid */
+ else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
+ {
if (p)
{
for (i = 0; i < encidx; i++)
@@ -374,8 +493,8 @@ utf8_to_native (const char *string, size_t length, int delim)
encbuf[encidx++] = *s;
val <<= 6;
val |= *s & 0x3f;
- if (!--nleft)
- { /* ready */
+ if (!--nleft) /* Ready. */
+ {
if (no_translation)
{
if (p)
@@ -386,43 +505,41 @@ utf8_to_native (const char *string, size_t length, int delim)
n += encidx;
encidx = 0;
}
- else if (active_charset)
- { /* table lookup */
- for (i = 0; i < 128; i++)
- {
- if (active_charset[i] == val)
- break;
- }
- if (i < 128)
- { /* we can print this one */
- if (p)
- *p++ = i + 128;
- n++;
- }
- else
- { /* we do not have a translation: print utf8 */
- if (p)
- {
- for (i = 0; i < encidx; i++)
- {
- sprintf (p, "\\x%02x", encbuf[i]);
- p += 4;
- }
- }
- n += encidx * 4;
- encidx = 0;
- }
- }
- else
- { /* native set */
+ else if (with_iconv)
+ {
+ /* Our strategy for using iconv is a bit strange
+ but it better keeps compatibility with
+ previous versions in regard to how invalid
+ encodings are displayed. What we do is to
+ keep the utf-8 as is and have the real
+ translation step then at the end. Yes, I
+ know that this is ugly. However we are short
+ of the 1.4 release and for this branch we
+ should not mess too much around with iconv
+ things. One reason for this is that we don't
+ know enough about non-GNU iconv
+ implementation and want to minimize the risk
+ of breaking the code on too many platforms. */
+ if ( p )
+ {
+ for (i=0; i < encidx; i++ )
+ *p++ = encbuf[i];
+ }
+ n += encidx;
+ encidx = 0;
+ }
+ else /* Latin-1 case. */
+ {
if (val >= 0x80 && val < 256)
{
- n++; /* we can simply print this character */
+ /* We can simply print this character */
+ n++;
if (p)
*p++ = val;
}
else
- { /* we do not have a translation: print utf8 */
+ {
+ /* We do not have a translation: print utf8. */
if (p)
{
for (i = 0; i < encidx; i++)
@@ -440,13 +557,78 @@ utf8_to_native (const char *string, size_t length, int delim)
}
}
if (!buffer)
- { /* allocate the buffer after the first pass */
+ {
+ /* Allocate the buffer after the first pass. */
buffer = p = jnlib_xmalloc (n + 1);
}
- else
+ else if (with_iconv)
+ {
+ /* Note: See above for comments. */
+ iconv_t cd;
+ const char *inptr;
+ char *outbuf, *outptr;
+ size_t inbytes, outbytes;
+
+ *p = 0; /* Terminate the buffer. */
+
+ cd = iconv_open (active_charset_name, "utf-8");
+ if (cd == (iconv_t)-1)
+ {
+ handle_iconv_error (active_charset_name, "utf-8", 1);
+ jnlib_free (buffer);
+ return utf8_to_native (string, length, delim);
+ }
+
+ /* Allocate a new buffer large enough to hold all possible
+ encodings. */
+ n = p - buffer + 1;
+ inbytes = n - 1;;
+ inptr = buffer;
+ outbytes = n * MB_LEN_MAX;
+ if (outbytes / MB_LEN_MAX != n)
+ BUG (); /* Actually an overflow. */
+ outbuf = outptr = jnlib_xmalloc (outbytes);
+ if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
+ &outptr, &outbytes) == (size_t)-1)
+ {
+ static int shown;
+
+ if (!shown)
+ log_info (_("conversion from `%s' to `%s' failed: %s\n"),
+ "utf-8", active_charset_name, strerror (errno));
+ shown = 1;
+ /* Didn't worked out. Try again but without iconv. */
+ jnlib_free (buffer);
+ buffer = NULL;
+ jnlib_free (outbuf);
+ outbuf = do_utf8_to_native (string, length, delim, 0);
+ }
+ else /* Success. */
+ {
+ *outptr = 0; /* Make sure it is a string. */
+ /* We could realloc the buffer now but I doubt that it
+ makes much sense given that it will get freed
+ anyway soon after. */
+ jnlib_free (buffer);
+ }
+ iconv_close (cd);
+ return outbuf;
+ }
+ else /* Not using iconv. */
{
- *p = 0; /* make a string */
+ *p = 0; /* Make sure it is a string. */
return buffer;
}
}
}
+
+/* Convert string, which is in UTF-8 to native encoding. Replace
+ illegal encodings by some "\xnn" and quote all control
+ characters. A character with value DELIM will always be quoted, it
+ must be a vanilla ASCII character. A DELIM value of -1 is special:
+ it disables all quoting of control characters. */
+char *
+utf8_to_native (const char *string, size_t length, int delim)
+{
+ return do_utf8_to_native (string, length, delim, use_iconv);
+}