/* stringhelp.c - standard string helper functions
* Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005,
* 2006, 2007 Free Software Foundation, Inc.
*
* This file is part of JNLIB.
*
* JNLIB is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 3 of
* the License, or (at your option) any later version.
*
* JNLIB is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see .
*/
#include
#include
#include
#include
#include
#ifdef HAVE_W32_SYSTEM
#include
#endif
#include "libjnlib-config.h"
#include "utf8conv.h"
#include "stringhelp.h"
#define tohex_lower(n) ((n) < 10 ? ((n) + '0') : (((n) - 10) + 'a'))
/*
* Look for the substring SUB in buffer and return a pointer to that
* substring in BUFFER or NULL if not found.
* Comparison is case-insensitive.
*/
const char *
memistr (const void *buffer, size_t buflen, const char *sub)
{
const unsigned char *buf = buffer;
const unsigned char *t = (const unsigned char *)buffer;
const unsigned char *s = (const unsigned char *)sub;
size_t n = buflen;
for ( ; n ; t++, n-- )
{
if ( toupper (*t) == toupper (*s) )
{
for ( buf=t++, buflen = n--, s++;
n && toupper (*t) == toupper (*s); t++, s++, n-- )
;
if (!*s)
return (const char*)buf;
t = buf;
s = (const unsigned char *)sub ;
n = buflen;
}
}
return NULL;
}
const char *
ascii_memistr ( const void *buffer, size_t buflen, const char *sub )
{
const unsigned char *buf = buffer;
const unsigned char *t = (const unsigned char *)buf;
const unsigned char *s = (const unsigned char *)sub;
size_t n = buflen;
for ( ; n ; t++, n-- )
{
if (ascii_toupper (*t) == ascii_toupper (*s) )
{
for ( buf=t++, buflen = n--, s++;
n && ascii_toupper (*t) == ascii_toupper (*s); t++, s++, n-- )
;
if (!*s)
return (const char*)buf;
t = (const unsigned char *)buf;
s = (const unsigned char *)sub ;
n = buflen;
}
}
return NULL;
}
/* This function is similar to strncpy(). However it won't copy more
than N - 1 characters and makes sure that a '\0' is appended. With
N given as 0, nothing will happen. With DEST given as NULL, memory
will be allocated using jnlib_xmalloc (i.e. if it runs out of core
the function terminates). Returns DES or a pointer to the
allocated memory.
*/
char *
mem2str( char *dest , const void *src , size_t n )
{
char *d;
const char *s;
if( n ) {
if( !dest )
dest = jnlib_xmalloc( n ) ;
d = dest;
s = src ;
for(n--; n && *s; n-- )
*d++ = *s++;
*d = '\0' ;
}
return dest ;
}
/****************
* remove leading and trailing white spaces
*/
char *
trim_spaces( char *str )
{
char *string, *p, *mark;
string = str;
/* find first non space character */
for( p=string; *p && isspace( *(byte*)p ) ; p++ )
;
/* move characters */
for( (mark = NULL); (*string = *p); string++, p++ )
if( isspace( *(byte*)p ) ) {
if( !mark )
mark = string ;
}
else
mark = NULL ;
if( mark )
*mark = '\0' ; /* remove trailing spaces */
return str ;
}
/****************
* remove trailing white spaces
*/
char *
trim_trailing_spaces( char *string )
{
char *p, *mark;
for( mark = NULL, p = string; *p; p++ ) {
if( isspace( *(byte*)p ) ) {
if( !mark )
mark = p;
}
else
mark = NULL;
}
if( mark )
*mark = '\0' ;
return string ;
}
unsigned
trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
{
byte *p, *mark;
unsigned n;
for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
if( strchr(trimchars, *p ) ) {
if( !mark )
mark = p;
}
else
mark = NULL;
}
if( mark ) {
*mark = 0;
return mark - line;
}
return len;
}
/****************
* remove trailing white spaces and return the length of the buffer
*/
unsigned
trim_trailing_ws( byte *line, unsigned len )
{
return trim_trailing_chars( line, len, " \t\r\n" );
}
size_t
length_sans_trailing_chars (const unsigned char *line, size_t len,
const char *trimchars )
{
const unsigned char *p, *mark;
size_t n;
for( mark=NULL, p=line, n=0; n < len; n++, p++ )
{
if (strchr (trimchars, *p ))
{
if( !mark )
mark = p;
}
else
mark = NULL;
}
if (mark)
return mark - line;
return len;
}
/*
* Return the length of line ignoring trailing white-space.
*/
size_t
length_sans_trailing_ws (const unsigned char *line, size_t len)
{
return length_sans_trailing_chars (line, len, " \t\r\n");
}
/***************
* Extract from a given path the filename component.
*
*/
char *
make_basename(const char *filepath, const char *inputpath)
{
char *p;
#ifdef __riscos__
return riscos_make_basename(filepath, inputpath);
#endif
if ( !(p=strrchr(filepath, '/')) )
#ifdef HAVE_DRIVE_LETTERS
if ( !(p=strrchr(filepath, '\\')) )
if ( !(p=strrchr(filepath, ':')) )
#endif
{
return jnlib_xstrdup(filepath);
}
return jnlib_xstrdup(p+1);
}
/***************
* Extract from a given filename the path prepended to it.
* If their isn't a path prepended to the filename, a dot
* is returned ('.').
*
*/
char *
make_dirname(const char *filepath)
{
char *dirname;
int dirname_length;
char *p;
if ( !(p=strrchr(filepath, '/')) )
#ifdef HAVE_DRIVE_LETTERS
if ( !(p=strrchr(filepath, '\\')) )
if ( !(p=strrchr(filepath, ':')) )
#endif
{
return jnlib_xstrdup(".");
}
dirname_length = p-filepath;
dirname = jnlib_xmalloc(dirname_length+1);
strncpy(dirname, filepath, dirname_length);
dirname[dirname_length] = 0;
return dirname;
}
/****************
* Construct a filename from the NULL terminated list of parts.
* Tilde expansion is done here.
*/
char *
make_filename( const char *first_part, ... )
{
va_list arg_ptr ;
size_t n;
const char *s;
char *name, *home, *p;
va_start (arg_ptr, first_part);
n = strlen (first_part) + 1;
while ( (s = va_arg (arg_ptr, const char *)) )
n += strlen(s) + 1;
va_end(arg_ptr);
home = NULL;
if ( *first_part == '~' && first_part[1] == '/'
&& (home = getenv("HOME")) && *home )
n += strlen (home);
name = jnlib_xmalloc (n);
p = (home
? stpcpy (stpcpy (name,home), first_part + 1)
: stpcpy(name, first_part));
va_start (arg_ptr, first_part) ;
while ( (s = va_arg(arg_ptr, const char *)) )
p = stpcpy (stpcpy (p,"/"), s);
va_end(arg_ptr);
#ifdef HAVE_DRIVE_LETTERS
/* We better avoid mixing slashes and backslashes and prefer
backslashes. There is usual no problem with mixing them, however
a very few W32 API calls can't grok plain slashes. Printing
filenames with mixed slashes also looks a bit strange. */
if (strchr (name, '\\'))
{
for (p=name; *p; p++)
if (*p == '/')
*p = '\\';
}
#endif /*HAVE_DRIVE_LETTERS*/
return name;
}
/* Compare whether the filenames are identical. This is a
special version of strcmp() taking the semantics of filenames in
account. Note that this function works only on the supplied names
without considereing any context like the current directory. See
also same_file_p(). */
int
compare_filenames (const char *a, const char *b)
{
#ifdef HAVE_DRIVE_LETTERS
for ( ; *a && *b; a++, b++ )
{
if (*a != *b
&& (toupper (*(const unsigned char*)a)
!= toupper (*(const unsigned char*)b) )
&& !((*a == '/' && *b == '\\') || (*a == '\\' && *b == '/')))
break;
}
if ((*a == '/' && *b == '\\') || (*a == '\\' && *b == '/'))
return 0;
else
return (toupper (*(const unsigned char*)a)
- toupper (*(const unsigned char*)b));
#else
return strcmp(a,b);
#endif
}
/* Convert 2 hex characters at S to a byte value. Return this value
or -1 if there is an error. */
int
hextobyte (const char *s)
{
int c;
if ( *s >= '0' && *s <= '9' )
c = 16 * (*s - '0');
else if ( *s >= 'A' && *s <= 'F' )
c = 16 * (10 + *s - 'A');
else if ( *s >= 'a' && *s <= 'f' )
c = 16 * (10 + *s - 'a');
else
return -1;
s++;
if ( *s >= '0' && *s <= '9' )
c += *s - '0';
else if ( *s >= 'A' && *s <= 'F' )
c += 10 + *s - 'A';
else if ( *s >= 'a' && *s <= 'f' )
c += 10 + *s - 'a';
else
return -1;
return c;
}
/* Print a BUFFER to stream FP while replacing all control characters
and the characters DELIM and DELIM2 with standard C escape
sequences. Returns the number of characters printed. */
size_t
print_sanitized_buffer2 (FILE *fp, const void *buffer, size_t length,
int delim, int delim2)
{
const unsigned char *p = buffer;
size_t count = 0;
for (; length; length--, p++, count++)
{
/* Fixme: Check whether *p < 0xa0 is correct for utf8 encoding. */
if (*p < 0x20
|| (*p >= 0x7f && *p < 0xa0)
|| *p == delim
|| *p == delim2
|| ((delim || delim2) && *p=='\\'))
{
putc ('\\', fp);
count++;
if (*p == '\n')
{
putc ('n', fp);
count++;
}
else if (*p == '\r')
{
putc ('r', fp);
count++;
}
else if (*p == '\f')
{
putc ('f', fp);
count++;
}
else if (*p == '\v')
{
putc ('v', fp);
count++;
}
else if (*p == '\b')
{
putc ('b', fp);
count++;
}
else if (!*p)
{
putc('0', fp);
count++;
}
else
{
fprintf (fp, "x%02x", *p);
count += 3;
}
}
else
{
putc (*p, fp);
count++;
}
}
return count;
}
/* Same as print_sanitized_buffer2 but with just one delimiter. */
size_t
print_sanitized_buffer (FILE *fp, const void *buffer, size_t length,
int delim)
{
return print_sanitized_buffer2 (fp, buffer, length, delim, 0);
}
size_t
print_sanitized_utf8_buffer (FILE *fp, const void *buffer,
size_t length, int delim)
{
const char *p = buffer;
size_t i;
/* We can handle plain ascii simpler, so check for it first. */
for (i=0; i < length; i++ )
{
if ( (p[i] & 0x80) )
break;
}
if (i < length)
{
char *buf = utf8_to_native (p, length, delim);
/*(utf8 conversion already does the control character quoting)*/
i = strlen (buf);
fputs (buf, fp);
jnlib_free (buf);
return i;
}
else
return print_sanitized_buffer (fp, p, length, delim);
}
size_t
print_sanitized_string2 (FILE *fp, const char *string, int delim, int delim2)
{
return string? print_sanitized_buffer2 (fp, string, strlen (string),
delim, delim2):0;
}
size_t
print_sanitized_string (FILE *fp, const char *string, int delim)
{
return string? print_sanitized_buffer (fp, string, strlen (string), delim):0;
}
size_t
print_sanitized_utf8_string (FILE *fp, const char *string, int delim)
{
return string? print_sanitized_utf8_buffer (fp,
string, strlen (string),
delim) : 0;
}
/* Create a string from the buffer P_ARG of length N which is suitable for
printing. Caller must release the created string using xfree. */
char *
sanitize_buffer (const void *p_arg, size_t n, int delim)
{
const unsigned char *p = p_arg;
size_t save_n, buflen;
const unsigned char *save_p;
char *buffer, *d;
/* First count length. */
for (save_n = n, save_p = p, buflen=1 ; n; n--, p++ )
{
if ( *p < 0x20 || *p == 0x7f || *p == delim || (delim && *p=='\\'))
{
if ( *p=='\n' || *p=='\r' || *p=='\f'
|| *p=='\v' || *p=='\b' || !*p )
buflen += 2;
else
buflen += 5;
}
else
buflen++;
}
p = save_p;
n = save_n;
/* And now make the string */
d = buffer = jnlib_xmalloc( buflen );
for ( ; n; n--, p++ )
{
if (*p < 0x20 || *p == 0x7f || *p == delim || (delim && *p=='\\')) {
*d++ = '\\';
if( *p == '\n' )
*d++ = 'n';
else if( *p == '\r' )
*d++ = 'r';
else if( *p == '\f' )
*d++ = 'f';
else if( *p == '\v' )
*d++ = 'v';
else if( *p == '\b' )
*d++ = 'b';
else if( !*p )
*d++ = '0';
else {
sprintf(d, "x%02x", *p );
d += 3;
}
}
else
*d++ = *p;
}
*d = 0;
return buffer;
}
/* Given a string containing an UTF-8 encoded text, return the number
of characters in this string. It differs from strlen in that it
only counts complete UTF-8 characters. Note, that this function
does not take combined characters into account. */
size_t
utf8_charcount (const char *s)
{
size_t n;
for (n=0; *s; s++)
if ( (*s&0xc0) != 0x80 ) /* Exclude continuation bytes: 10xxxxxx */
n++;
return n;
}
/****************************************************
********** W32 specific functions ****************
****************************************************/
#ifdef HAVE_W32_SYSTEM
const char *
w32_strerror (int ec)
{
static char strerr[256];
if (ec == -1)
ec = (int)GetLastError ();
FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, ec,
MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
strerr, DIM (strerr)-1, NULL);
return strerr;
}
#endif /*HAVE_W32_SYSTEM*/
/****************************************************
******** Locale insensitive ctype functions ********
****************************************************/
/* FIXME: replace them by a table lookup and macros */
int
ascii_isupper (int c)
{
return c >= 'A' && c <= 'Z';
}
int
ascii_islower (int c)
{
return c >= 'a' && c <= 'z';
}
int
ascii_toupper (int c)
{
if (c >= 'a' && c <= 'z')
c &= ~0x20;
return c;
}
int
ascii_tolower (int c)
{
if (c >= 'A' && c <= 'Z')
c |= 0x20;
return c;
}
int
ascii_strcasecmp( const char *a, const char *b )
{
if (a == b)
return 0;
for (; *a && *b; a++, b++) {
if (*a != *b && ascii_toupper(*a) != ascii_toupper(*b))
break;
}
return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
}
int
ascii_strncasecmp (const char *a, const char *b, size_t n)
{
const unsigned char *p1 = (const unsigned char *)a;
const unsigned char *p2 = (const unsigned char *)b;
unsigned char c1, c2;
if (p1 == p2 || !n )
return 0;
do
{
c1 = ascii_tolower (*p1);
c2 = ascii_tolower (*p2);
if ( !--n || c1 == '\0')
break;
++p1;
++p2;
}
while (c1 == c2);
return c1 - c2;
}
int
ascii_memcasecmp (const void *a_arg, const void *b_arg, size_t n )
{
const char *a = a_arg;
const char *b = b_arg;
if (a == b)
return 0;
for ( ; n; n--, a++, b++ )
{
if( *a != *b && ascii_toupper (*a) != ascii_toupper (*b) )
return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
}
return 0;
}
int
ascii_strcmp( const char *a, const char *b )
{
if (a == b)
return 0;
for (; *a && *b; a++, b++) {
if (*a != *b )
break;
}
return *a == *b? 0 : (*(signed char *)a - *(signed char *)b);
}
void *
ascii_memcasemem (const void *haystack, size_t nhaystack,
const void *needle, size_t nneedle)
{
if (!nneedle)
return (void*)haystack; /* finding an empty needle is really easy */
if (nneedle <= nhaystack)
{
const char *a = haystack;
const char *b = a + nhaystack - nneedle;
for (; a <= b; a++)
{
if ( !ascii_memcasecmp (a, needle, nneedle) )
return (void *)a;
}
}
return NULL;
}
/*********************************************
********** missing string functions *********
*********************************************/
#ifndef HAVE_STPCPY
char *
stpcpy(char *a,const char *b)
{
while( *b )
*a++ = *b++;
*a = 0;
return (char*)a;
}
#endif
#ifndef HAVE_STRSEP
/* Code taken from glibc-2.2.1/sysdeps/generic/strsep.c. */
char *
strsep (char **stringp, const char *delim)
{
char *begin, *end;
begin = *stringp;
if (begin == NULL)
return NULL;
/* A frequent case is when the delimiter string contains only one
character. Here we don't need to call the expensive `strpbrk'
function and instead work using `strchr'. */
if (delim[0] == '\0' || delim[1] == '\0')
{
char ch = delim[0];
if (ch == '\0')
end = NULL;
else
{
if (*begin == ch)
end = begin;
else if (*begin == '\0')
end = NULL;
else
end = strchr (begin + 1, ch);
}
}
else
/* Find the end of the token. */
end = strpbrk (begin, delim);
if (end)
{
/* Terminate the token and set *STRINGP past NUL character. */
*end++ = '\0';
*stringp = end;
}
else
/* No more delimiters; this is the last token. */
*stringp = NULL;
return begin;
}
#endif /*HAVE_STRSEP*/
#ifndef HAVE_STRLWR
char *
strlwr(char *s)
{
char *p;
for(p=s; *p; p++ )
*p = tolower(*p);
return s;
}
#endif
#ifndef HAVE_STRCASECMP
int
strcasecmp( const char *a, const char *b )
{
for( ; *a && *b; a++, b++ ) {
if( *a != *b && toupper(*a) != toupper(*b) )
break;
}
return *(const byte*)a - *(const byte*)b;
}
#endif
/****************
* mingw32/cpd has a memicmp()
*/
#ifndef HAVE_MEMICMP
int
memicmp( const char *a, const char *b, size_t n )
{
for( ; n; n--, a++, b++ )
if( *a != *b && toupper(*(const byte*)a) != toupper(*(const byte*)b) )
return *(const byte *)a - *(const byte*)b;
return 0;
}
#endif
#ifndef HAVE_MEMRCHR
void *
memrchr (const void *buffer, int c, size_t n)
{
const unsigned char *p = buffer;
for (p += n; n ; n--)
if (*--p == c)
return (void *)p;
return NULL;
}
#endif /*HAVE_MEMRCHR*/
/* Percent-escape the string STR by replacing colons with '%3a'. If
EXTRA is not NULL all characters in EXTRA are also escaped. */
static char *
do_percent_escape (const char *str, const char *extra, int die)
{
int i, j;
char *ptr;
if (!str)
return NULL;
for (i=j=0; str[i]; i++)
if (str[i] == ':' || str[i] == '%' || (extra && strchr (extra, str[i])))
j++;
if (die)
ptr = jnlib_xmalloc (i + 2 * j + 1);
else
{
ptr = jnlib_malloc (i + 2 * j + 1);
if (!ptr)
return NULL;
}
i = 0;
while (*str)
{
if (*str == ':')
{
ptr[i++] = '%';
ptr[i++] = '3';
ptr[i++] = 'a';
}
else if (*str == '%')
{
ptr[i++] = '%';
ptr[i++] = '2';
ptr[i++] = '5';
}
else if (extra && strchr (extra, *str))
{
ptr[i++] = '%';
ptr[i++] = tohex_lower ((*str>>4)&15);
ptr[i++] = tohex_lower (*str&15);
}
else
ptr[i++] = *str;
str++;
}
ptr[i] = '\0';
return ptr;
}
/* Percent-escape the string STR by replacing colons with '%3a'. If
EXTRA is not NULL all characters in EXTRA are also escaped. */
char *
percent_escape (const char *str, const char *extra)
{
return do_percent_escape (str, extra, 1);
}
/* Same as percent_escape but return NULL instead of exiting on memory
error. */
char *
try_percent_escape (const char *str, const char *extra)
{
return do_percent_escape (str, extra, 0);
}