summaryrefslogtreecommitdiffstats
path: root/util/strgutil.c
diff options
context:
space:
mode:
Diffstat (limited to 'util/strgutil.c')
-rw-r--r--util/strgutil.c606
1 files changed, 499 insertions, 107 deletions
diff --git a/util/strgutil.c b/util/strgutil.c
index 89722f8e4..ff1ff5126 100644
--- a/util/strgutil.c
+++ b/util/strgutil.c
@@ -1,5 +1,5 @@
/* strgutil.c - string utilities
- * Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ * Copyright (C) 1994, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@@ -22,7 +22,6 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
-#include <gcrypt.h>
#include "types.h"
#include "util.h"
#include "memory.h"
@@ -66,29 +65,10 @@ static ushort latin2_unicode[128] = {
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
};
-static ushort ibm850_unicode[128] = {
- 0x00c7,0x00fc,0x00e9,0x00e2,0x00e4,0x00e0,0x00e5,0x00e7,
- 0x00ea,0x00eb,0x00e8,0x00ef,0x00ee,0x00ec,0x00c4,0x00c5,
- 0x00c9,0x00e6,0x00c6,0x00f4,0x00f6,0x00f2,0x00fb,0x00f9,
- 0x00ff,0x00d6,0x00dc,0x00f8,0x00a3,0x00d8,0x00d7,0x0192,
- 0x00e1,0x00ed,0x00f3,0x00fa,0x00f1,0x00d1,0x00aa,0x00ba,
- 0x00bf,0x00ae,0x00ac,0x00bd,0x00bc,0x00a1,0x00ab,0x00bb,
- 0x2591,0x2592,0x2593,0x2502,0x2524,0x00c1,0x00c2,0x00c0,
- 0x00a9,0x2563,0x2551,0x2557,0x255d,0x00a2,0x00a5,0x2510,
- 0x2514,0x2534,0x252c,0x251c,0x2500,0x253c,0x00e3,0x00c3,
- 0x255a,0x2554,0x2569,0x2566,0x2560,0x2550,0x256c,0x00a4,
- 0x00f0,0x00d0,0x00ca,0x00cb,0x00c8,0x0131,0x00cd,0x00ce,
- 0x00cf,0x2518,0x250c,0x2588,0x2584,0x00a6,0x00cc,0x2580,
- 0x00d3,0x00df,0x00d4,0x00d2,0x00f5,0x00d5,0x00b5,0x00fe,
- 0x00de,0x00da,0x00db,0x00d9,0x00fd,0x00dd,0x00af,0x00b4,
- 0x00ad,0x00b1,0x2017,0x00be,0x00b6,0x00a7,0x00f7,0x00b8,
- 0x00b0,0x00a8,0x00b7,0x00b9,0x00b3,0x00b2,0x25a0,0x00a0,
-};
-static int query_native_charset_done = 0;
static const char *active_charset_name = "iso-8859-1";
static ushort *active_charset = NULL;
-
+static int no_translation = 0;
void
free_strlist( STRLIST sl )
@@ -97,7 +77,7 @@ free_strlist( STRLIST sl )
for(; sl; sl = sl2 ) {
sl2 = sl->next;
- gcry_free(sl);
+ m_free(sl);
}
}
@@ -107,7 +87,7 @@ add_to_strlist( STRLIST *list, const char *string )
{
STRLIST sl;
- sl = gcry_xmalloc( sizeof *sl + strlen(string));
+ sl = m_alloc( sizeof *sl + strlen(string));
sl->flags = 0;
strcpy(sl->d, string);
sl->next = *list;
@@ -129,7 +109,7 @@ add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
else {
char *p = native_to_utf8( string );
sl = add_to_strlist( list, p );
- gcry_free( p );
+ m_free( p );
}
return sl;
}
@@ -139,7 +119,7 @@ append_to_strlist( STRLIST *list, const char *string )
{
STRLIST r, sl;
- sl = gcry_xmalloc( sizeof *sl + strlen(string));
+ sl = m_alloc( sizeof *sl + strlen(string));
sl->flags = 0;
strcpy(sl->d, string);
sl->next = NULL;
@@ -163,7 +143,7 @@ append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
else {
char *p = native_to_utf8( string );
sl = append_to_strlist( list, p );
- gcry_free( p );
+ m_free( p );
}
return sl;
}
@@ -188,99 +168,230 @@ strlist_last( STRLIST node )
return node;
}
+char *
+pop_strlist( STRLIST *list )
+{
+ char *str=NULL;
+ STRLIST sl=*list;
+ if(sl)
+ {
+ str=m_alloc(strlen(sl->d)+1);
+ strcpy(str,sl->d);
-int
-string_count_chr( const char *string, int c )
-{
- int count;
- for(count=0; *string; string++ )
- if( *string == c )
- count++;
- return count;
+ *list=sl->next;
+ m_free(sl);
+ }
+
+ return str;
}
+/****************
+ * look for the substring SUB in buffer and return a pointer to that
+ * substring in BUF or NULL if not found.
+ * Comparison is case-insensitive.
+ */
+const char *
+memistr( const char *buf, size_t buflen, const char *sub )
+{
+ const byte *t, *s ;
+ size_t n;
+
+ for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
+ if( toupper(*t) == toupper(*s) ) {
+ for( buf=t++, buflen = n--, s++;
+ n && toupper(*t) == toupper(*s); t++, s++, n-- )
+ ;
+ if( !*s )
+ return buf;
+ t = buf; n = buflen; s = sub ;
+ }
+
+ return NULL ;
+}
-static const char*
-query_native_charset(void)
+const char *
+ascii_memistr( const char *buf, size_t buflen, const char *sub )
{
- #ifdef __MINGW32__
- unsigned int cp;
+ const byte *t, *s ;
+ size_t n;
+
+ for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
+ if( ascii_toupper(*t) == ascii_toupper(*s) ) {
+ for( buf=t++, buflen = n--, s++;
+ n && ascii_toupper(*t) == ascii_toupper(*s); t++, s++, n-- )
+ ;
+ if( !*s )
+ return buf;
+ t = buf; n = buflen; s = sub ;
+ }
+
+ return NULL ;
+}
- cp = GetConsoleOutputCP();
- if( cp != GetConsoleCP() ) {
- /* The input cgarset is not equal to the output charset
- * our system depends on it and therefore we will set
- * same the same (this won't work on Windows 95) */
- if( !SetConsoleCP( cp ) )
- log_info("can't set Input-CP to Output-CP: %d\n",
- (int)GetLastError() );
+/****************
+ * Wie strncpy(), aber es werden maximal n-1 zeichen kopiert und ein
+ * '\0' angehängt. Ist n = 0, so geschieht nichts, ist Destination
+ * gleich NULL, so wird via m_alloc Speicher besorgt, ist dann nicht
+ * genügend Speicher vorhanden, so bricht die funktion ab.
+ */
+char *
+mem2str( char *dest , const void *src , size_t n )
+{
+ char *d;
+ const char *s;
+
+ if( n ) {
+ if( !dest )
+ dest = m_alloc( n ) ;
+ d = dest;
+ s = src ;
+ for(n--; n && *s; n-- )
+ *d++ = *s++;
+ *d = '\0' ;
}
- /* we could read the registry, but this seems to be too much work */
- switch( cp ) {
- case 850: return "ibm850";
- case 437: return "ibm437";
- case 1252: return "iso-8859-1";
- default:
- log_info("unknown MS-Windows CodePage %u "
- "- trying to switch to Latin-1\n", cp );
- /* try to set latin-1 */
- if( !SetConsoleOutputCP( 1252 ) ) {
- if( !SetConsoleCP( 1252 ) )
- return "iso-8859-1";
- else /* back off */
- SetConsoleOutputCP( cp );
+
+ return dest ;
+}
+
+
+/****************
+ * remove leading and trailing white spaces
+ */
+char *
+trim_spaces( char *str )
+{
+ char *string, *p, *mark;
+
+ string = str;
+ /* find first non space character */
+ for( p=string; *p && isspace( *(byte*)p ) ; p++ )
+ ;
+ /* move characters */
+ for( (mark = NULL); (*string = *p); string++, p++ )
+ if( isspace( *(byte*)p ) ) {
+ if( !mark )
+ mark = string ;
+ }
+ else
+ mark = NULL ;
+ if( mark )
+ *mark = '\0' ; /* remove trailing spaces */
+
+ return str ;
+}
+
+
+
+unsigned int
+trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
+{
+ byte *p, *mark;
+ unsigned n;
+
+ for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
+ if( strchr(trimchars, *p ) ) {
+ if( !mark )
+ mark = p;
}
- log_info("no information about MS-Windows CodePage %u\n", cp );
- return NULL;
+ else
+ mark = NULL;
}
- #else
- return NULL; /* unknown */
- #endif
+
+ if( mark ) {
+ *mark = 0;
+ return mark - line;
+ }
+ return len;
}
+/****************
+ * remove trailing white spaces and return the length of the buffer
+ */
+unsigned
+trim_trailing_ws( byte *line, unsigned len )
+{
+ return trim_trailing_chars( line, len, " \t\r\n" );
+}
-const char*
-get_native_charset()
+unsigned int
+check_trailing_chars( const byte *line, unsigned int len,
+ const char *trimchars )
{
- if( !query_native_charset_done ) {
- const char *s;
+ const byte *p, *mark;
+ unsigned int n;
- query_native_charset_done = 1;
- s = query_native_charset();
- if( s )
- set_native_charset(s);
+ for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
+ if( strchr(trimchars, *p ) ) {
+ if( !mark )
+ mark = p;
+ }
+ else
+ mark = NULL;
}
- return active_charset_name;
+ if( mark ) {
+ return mark - line;
+ }
+ return len;
+}
+
+/****************
+ * remove trailing white spaces and return the length of the buffer
+ */
+unsigned int
+check_trailing_ws( const byte *line, unsigned int len )
+{
+ return check_trailing_chars( line, len, " \t\r\n" );
+}
+
+
+
+int
+string_count_chr( const char *string, int c )
+{
+ int count;
+ for(count=0; *string; string++ )
+ if( *string == c )
+ count++;
+ return count;
}
int
set_native_charset( const char *newset )
{
- query_native_charset_done = 1; /* don't do this when we want to set one*/
- if( !stricmp( newset, "iso-8859-1" ) ) {
+ if( !ascii_strcasecmp( newset, "iso-8859-1" ) ) {
active_charset_name = "iso-8859-1";
+ no_translation = 0;
active_charset = NULL;
}
- else if( !stricmp( newset, "iso-8859-2" ) ) {
+ else if( !ascii_strcasecmp( newset, "iso-8859-2" ) ) {
active_charset_name = "iso-8859-2";
+ no_translation = 0;
active_charset = latin2_unicode;
}
- else if( !stricmp( newset, "koi8-r" ) ) {
+ else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
active_charset_name = "koi8-r";
+ no_translation = 0;
active_charset = koi8_unicode;
}
- else if( !stricmp( newset, "ibm850" ) || !stricmp( newset, "ibm437" ) ) {
- active_charset_name = "ibm850";
- active_charset = ibm850_unicode;
+ else if( !ascii_strcasecmp (newset, "utf8" )
+ || !ascii_strcasecmp(newset, "utf-8") ) {
+ active_charset_name = "utf-8";
+ no_translation = 1;
+ active_charset = NULL;
}
else
- return GPGERR_GENERAL;
+ return G10ERR_GENERAL;
return 0;
}
+const char*
+get_native_charset()
+{
+ return active_charset_name;
+}
/****************
* Convert string, which is in native encoding to UTF8 and return the
@@ -294,13 +405,16 @@ native_to_utf8( const char *string )
byte *p;
size_t length=0;
- if( active_charset ) {
+ if (no_translation) {
+ buffer = m_strdup (string);
+ }
+ else if( active_charset ) {
for(s=string; *s; s++ ) {
length++;
if( *s & 0x80 )
length += 2; /* we may need 3 bytes */
}
- buffer = gcry_xmalloc( length + 1 );
+ buffer = m_alloc( length + 1 );
for(p=buffer, s=string; *s; s++ ) {
if( *s & 0x80 ) {
ushort val = active_charset[ *s & 0x7f ];
@@ -325,7 +439,7 @@ native_to_utf8( const char *string )
if( *s & 0x80 )
length++;
}
- buffer = gcry_xmalloc( length + 1 );
+ buffer = m_alloc( length + 1 );
for(p=buffer, s=string; *s; s++ ) {
if( *s & 0x80 ) {
*p++ = 0xc0 | ((*s >> 6) & 3);
@@ -341,15 +455,17 @@ native_to_utf8( const char *string )
/****************
- * Convert string, which is in UTF8 to native encoding.
- * illegal encodings by some "\xnn" and quote all control characters
- */
+ * Convert string, which is in UTF8 to native encoding. illegal
+ * encodings by some "\xnn" and quote all control characters. A
+ * character with value DELIM will always be quoted, it must be a
+ * vanilla ASCII character.
+ */
char *
-utf8_to_native( const char *string, size_t length )
+utf8_to_native( const char *string, size_t length, int delim )
{
int nleft;
int i;
- byte encbuf[7];
+ byte encbuf[8];
int encidx;
const byte *s;
size_t n;
@@ -376,7 +492,8 @@ utf8_to_native( const char *string, size_t length )
}
if( !nleft ) {
if( !(*s & 0x80) ) { /* plain ascii */
- if( iscntrl( *s ) ) {
+ if( *s < 0x20 || *s == 0x7f || *s == delim ||
+ (delim && *s=='\\')) {
n++;
if( p )
*p++ = '\\';
@@ -387,11 +504,13 @@ utf8_to_native( const char *string, size_t length )
case '\v': n++; if( p ) *p++ = 'v'; break;
case '\b': n++; if( p ) *p++ = 'b'; break;
case 0 : n++; if( p ) *p++ = '0'; break;
- default: n += 3;
- sprintf( p, "x%02x", *s );
- if ( p )
- p += 3;
- break;
+ default:
+ n += 3;
+ if ( p ) {
+ sprintf( p, "x%02x", *s );
+ p += 3;
+ }
+ break;
}
}
else {
@@ -402,27 +521,32 @@ utf8_to_native( const char *string, size_t length )
else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
val = *s & 0x1f;
nleft = 1;
- encbuf[encidx=0] = *s;
+ encidx = 0;
+ encbuf[encidx++] = *s;
}
else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
val = *s & 0x0f;
nleft = 2;
- encbuf[encidx=0] = *s;
+ encidx = 0;
+ encbuf[encidx++] = *s;
}
else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
val = *s & 0x07;
nleft = 3;
- encbuf[encidx=0] = *s;
+ encidx = 0;
+ encbuf[encidx++] = *s;
}
else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
val = *s & 0x03;
nleft = 4;
- encbuf[encidx=0] = *s;
+ encidx = 0;
+ encbuf[encidx++] = *s;
}
else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
val = *s & 0x01;
nleft = 5;
- encbuf[encidx=0] = *s;
+ encidx = 0;
+ encbuf[encidx++] = *s;
}
else { /* invalid encoding: print as \xnn */
if( p ) {
@@ -435,19 +559,32 @@ utf8_to_native( const char *string, size_t length )
}
else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
if( p ) {
+ for(i=0; i < encidx; i++ ) {
+ sprintf(p, "\\x%02x", encbuf[i] );
+ p += 4;
+ }
sprintf(p, "\\x%02x", *s );
p += 4;
}
- n += 4;
+ n += 4 + 4*encidx;
nleft = 0;
+ encidx = 0;
resync = 1;
}
else {
- encbuf[++encidx] = *s;
+ encbuf[encidx++] = *s;
val <<= 6;
val |= *s & 0x3f;
if( !--nleft ) { /* ready */
- if( active_charset ) { /* table lookup */
+ if (no_translation) {
+ if( p ) {
+ for(i=0; i < encidx; i++ )
+ *p++ = encbuf[i];
+ }
+ n += encidx;
+ encidx = 0;
+ }
+ else if( active_charset ) { /* table lookup */
for(i=0; i < 128; i++ ) {
if( active_charset[i] == val )
break;
@@ -464,6 +601,7 @@ utf8_to_native( const char *string, size_t length )
}
}
n += encidx*4;
+ encidx = 0;
}
}
else { /* native set */
@@ -479,15 +617,15 @@ utf8_to_native( const char *string, size_t length )
}
}
n += encidx*4;
+ encidx = 0;
}
}
-
}
}
}
if( !buffer ) { /* allocate the buffer after the first pass */
- buffer = p = gcry_xmalloc( n + 1 );
+ buffer = p = m_alloc( n + 1 );
}
else {
*p = 0; /* make a string */
@@ -496,8 +634,262 @@ utf8_to_native( const char *string, size_t length )
}
}
+/****************************************************
+ ******** locale insensitive ctype functions ********
+ ****************************************************/
+/* FIXME: replace them by a table lookup and macros */
+int
+ascii_isupper (int c)
+{
+ return c >= 'A' && c <= 'Z';
+}
+
+int
+ascii_islower (int c)
+{
+ return c >= 'a' && c <= 'z';
+}
+
+int
+ascii_toupper (int c)
+{
+ if (c >= 'a' && c <= 'z')
+ c &= ~0x20;
+ return c;
+}
+
+int
+ascii_tolower (int c)
+{
+ if (c >= 'A' && c <= 'Z')
+ c |= 0x20;
+ return c;
+}
+
+
+int
+ascii_strcasecmp( const char *a, const char *b )
+{
+ if (a == b)
+ return 0;
+
+ for (; *a && *b; a++, b++) {
+ if (*a != *b && ascii_toupper(*a) != ascii_toupper(*b))
+ break;
+ }
+ return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
+}
+
+int
+ascii_memcasecmp( const char *a, const char *b, size_t n )
+{
+ if (a == b)
+ return 0;
+ for ( ; n; n--, a++, b++ ) {
+ if( *a != *b && ascii_toupper (*a) != ascii_toupper (*b) )
+ return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
+ }
+ return 0;
+}
+
+/*********************************************
+ ********** missing string functions *********
+ *********************************************/
+#ifndef HAVE_STPCPY
+char *
+stpcpy(char *a,const char *b)
+{
+ while( *b )
+ *a++ = *b++;
+ *a = 0;
+ return (char*)a;
+}
+#endif
+
+
+#ifndef HAVE_STRSEP
+/* code taken from glibc-2.2.1/sysdeps/generic/strsep.c */
+char *
+strsep (char **stringp, const char *delim)
+{
+ char *begin, *end;
+
+ begin = *stringp;
+ if (begin == NULL)
+ return NULL;
+
+ /* A frequent case is when the delimiter string contains only one
+ character. Here we don't need to call the expensive `strpbrk'
+ function and instead work using `strchr'. */
+ if (delim[0] == '\0' || delim[1] == '\0')
+ {
+ char ch = delim[0];
+
+ if (ch == '\0')
+ end = NULL;
+ else
+ {
+ if (*begin == ch)
+ end = begin;
+ else if (*begin == '\0')
+ end = NULL;
+ else
+ end = strchr (begin + 1, ch);
+ }
+ }
+ else
+ /* Find the end of the token. */
+ end = strpbrk (begin, delim);
+
+ if (end)
+ {
+ /* Terminate the token and set *STRINGP past NUL character. */
+ *end++ = '\0';
+ *stringp = end;
+ }
+ else
+ /* No more delimiters; this is the last token. */
+ *stringp = NULL;
+
+ return begin;
+}
+#endif /*HAVE_STRSEP*/
+
+
+#ifndef HAVE_STRLWR
+char *
+strlwr(char *s)
+{
+ char *p;
+ for(p=s; *p; p++ )
+ *p = tolower(*p);
+ return s;
+}
+#endif
+
+#ifndef HAVE_STRCASECMP
+int
+strcasecmp( const char *a, const char *b )
+{
+ for( ; *a && *b; a++, b++ ) {
+ if( *a != *b && toupper(*a) != toupper(*b) )
+ break;
+ }
+ return *(const byte*)a - *(const byte*)b;
+}
+#endif
+
+#ifndef HAVE_STRNCASECMP
+int
+strncasecmp( const char *a, const char *b, size_t n )
+{
+ for( ; n && *a && *b; a++, b++, n--) {
+ if( *a != *b && toupper(*a) != toupper(*b) )
+ break;
+ }
+ if (!n)
+ return 0;
+ return *(const byte*)a - *(const byte*)b;
+}
+#endif
+
+
+#ifdef __MINGW32__
+/*
+ * Like vsprintf but provides a pointer to malloc'd storage, which
+ * must be freed by the caller (m_free). Taken from libiberty as
+ * found in gcc-2.95.2 and a little bit modernized.
+ * FIXME: Write a new CRT for W32.
+ */
+int
+vasprintf ( char **result, const char *format, va_list args)
+{
+ const char *p = format;
+ /* Add one to make sure that it is never zero, which might cause malloc
+ to return NULL. */
+ int total_width = strlen (format) + 1;
+ va_list ap;
+
+ /* this is not really portable but works under Windows */
+ memcpy ( &ap, &args, sizeof (va_list));
+
+ while (*p != '\0')
+ {
+ if (*p++ == '%')
+ {
+ while (strchr ("-+ #0", *p))
+ ++p;
+ if (*p == '*')
+ {
+ ++p;
+ total_width += abs (va_arg (ap, int));
+ }
+ else
+ {
+ char *endp;
+ total_width += strtoul (p, &endp, 10);
+ p = endp;
+ }
+ if (*p == '.')
+ {
+ ++p;
+ if (*p == '*')
+ {
+ ++p;
+ total_width += abs (va_arg (ap, int));
+ }
+ else
+ {
+ char *endp;
+ total_width += strtoul (p, &endp, 10);
+ p = endp;
+ }
+ }
+ while (strchr ("hlL", *p))
+ ++p;
+ /* Should be big enough for any format specifier except %s
+ and floats. */
+ total_width += 30;
+ switch (*p)
+ {
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ case 'c':
+ (void) va_arg (ap, int);
+ break;
+ case 'f':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ (void) va_arg (ap, double);
+ /* Since an ieee double can have an exponent of 307, we'll
+ make the buffer wide enough to cover the gross case. */
+ total_width += 307;
+
+ case 's':
+ total_width += strlen (va_arg (ap, char *));
+ break;
+ case 'p':
+ case 'n':
+ (void) va_arg (ap, char *);
+ break;
+ }
+ }
+ }
+ *result = m_alloc (total_width);
+ if (*result != NULL)
+ return vsprintf (*result, format, args);
+ else
+ return 0;
+}
+#endif /*__MINGW32__*/