summaryrefslogtreecommitdiffstats
path: root/common/mbox-util.c
blob: a9086a3f5a9d5aa784c667acf318a66dadb88fdf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/* mbox-util.c - Mail address helper functions
 * Copyright (C) 1998-2010 Free Software Foundation, Inc.
 * Copyright (C) 1998-2015 Werner Koch
 *
 * This file is part of GnuPG.
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This file is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, see <https://www.gnu.org/licenses/>.
 */

/* NB: GPGME uses the same code to reflect our idea on how to extract
 * a mail address from a user id.
 */

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

#include "util.h"
#include "mbox-util.h"


static int
string_count_chr (const char *string, int c)
{
  int count;

  for (count=0; *string; string++ )
    if ( *string == c )
      count++;
  return count;
}

static int
mem_count_chr (const void *buffer, int c, size_t length)
{
  const char *s = buffer;
  int count;

  for (count=0; length; length--, s++)
    if (*s == c)
      count++;
  return count;
}


/* This is a case-sensitive version of our memistr.  I wonder why no
   standard function memstr exists but I better do not use the name
   memstr to avoid future conflicts.  */
static const char *
my_memstr (const void *buffer, size_t buflen, const char *sub)
{
  const unsigned char *buf = buffer;
  const unsigned char *t = (const unsigned char *)buf;
  const unsigned char *s = (const unsigned char *)sub;
  size_t n = buflen;

  for ( ; n ; t++, n-- )
    {
      if (*t == *s)
        {
          for (buf = t++, buflen = n--, s++; n && *t ==*s; t++, s++, n--)
            ;
          if (!*s)
            return (const char*)buf;
          t = (const unsigned char *)buf;
          s = (const unsigned char *)sub ;
          n = buflen;
	}
    }
  return NULL;
}



static int
string_has_ctrl_or_space (const char *string)
{
  for (; *string; string++ )
    if (!(*string & 0x80) && *string <= 0x20)
      return 1;
  return 0;
}


/* Return true if STRING has two consecutive '.' after an '@'
   sign.  */
static int
has_dotdot_after_at (const char *string)
{
  string = strchr (string, '@');
  if (!string)
    return 0; /* No at-sign.  */
  string++;
  return !!strstr (string, "..");
}


/* Check whether BUFFER has characters not valid in an RFC-822
   address.  LENGTH gives the length of BUFFER.

   To cope with OpenPGP we ignore non-ascii characters so that for
   example umlauts are legal in an email address.  An OpenPGP user ID
   must be utf-8 encoded but there is no strict requirement for
   RFC-822.  Thus to avoid IDNA encoding we put the address verbatim
   as utf-8 into the user ID under the assumption that mail programs
   handle IDNA at a lower level and take OpenPGP user IDs as utf-8.
   Note that we can't do an utf-8 encoding checking here because in
   keygen.c this function is called with the native encoding and
   native to utf-8 encoding is only done later.  */
int
has_invalid_email_chars (const void *buffer, size_t length)
{
  const unsigned char *s = buffer;
  int at_seen=0;
  const char *valid_chars=
    "01234567890_-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

  for ( ; length && *s; length--, s++ )
    {
      if ((*s & 0x80))
        continue; /* We only care about ASCII.  */
      if (*s == '@')
        at_seen=1;
      else if (!at_seen && !(strchr (valid_chars, *s)
                             || strchr ("!#$%&'*+/=?^`{|}~", *s)))
        return 1;
      else if (at_seen && !strchr (valid_chars, *s))
        return 1;
    }
  return 0;
}


/* Same as is_valid_mailbox (see below) but operates on non-nul
   terminated buffer.  */
int
is_valid_mailbox_mem (const void *name_arg, size_t namelen)
{
  const char *name = name_arg;

  return !( !name
            || !namelen
            || has_invalid_email_chars (name, namelen)
            || mem_count_chr (name, '@', namelen) != 1
            || *name == '@'
            || name[namelen-1] == '@'
            || name[namelen-1] == '.'
            || my_memstr (name, namelen, ".."));
}


/* Check whether NAME represents a valid mailbox according to
   RFC822. Returns true if so. */
int
is_valid_mailbox (const char *name)
{
  return name? is_valid_mailbox_mem (name, strlen (name)) : 0;
}


/* Return the mailbox (local-part@domain) form a standard user id.
 * All plain ASCII characters in the result are converted to
 * lowercase.  If SUBADDRESS is 1, '+' denoted sub-addresses are not
 * included in the result.  Caller must free the result.  Returns NULL
 * if no valid mailbox was found (or we are out of memory). */
char *
mailbox_from_userid (const char *userid, int subaddress)
{
  const char *s, *s_end;
  size_t len;
  char *result = NULL;

  s = strchr (userid, '<');
  if (s)
    {
      /* Seems to be a standard user id.  */
      s++;
      s_end = strchr (s, '>');
      if (s_end && s_end > s)
        {
          len = s_end - s;
          result = xtrymalloc (len + 1);
          if (!result)
            return NULL; /* Ooops - out of core.  */
          strncpy (result, s, len);
          result[len] = 0;
          /* Apply some basic checks on the address.  We do not use
             is_valid_mailbox because those checks are too strict.  */
          if (string_count_chr (result, '@') != 1  /* Need exactly one '@.  */
              || *result == '@'           /* local-part missing.  */
              || result[len-1] == '@'     /* domain missing.  */
              || result[len-1] == '.'     /* ends with a dot.  */
              || string_has_ctrl_or_space (result)
              || has_dotdot_after_at (result))
            {
              xfree (result);
              result = NULL;
              errno = EINVAL;
            }
        }
      else
        errno = EINVAL;
    }
  else if (is_valid_mailbox (userid))
    {
      /* The entire user id is a mailbox.  Return that one.  Note that
         this fallback method has some restrictions on the valid
         syntax of the mailbox.  However, those who want weird
         addresses should know about it and use the regular <...>
         syntax.  */
      result = xtrystrdup (userid);
    }
  else
    errno = EINVAL;

  if (result && subaddress == 1)
    {
      char *atsign, *plus;

      if ((atsign = strchr (result, '@')))
        {
          /* We consider a subaddress only if there is a single '+'
           * in the local part and the '+' is not the first or last
           * character.  */
          *atsign = 0;
          if ((plus = strchr (result, '+'))
              && !strchr (plus+1, '+')
              && result != plus
              && plus[1] )
            {
              *atsign = '@';
              memmove (plus, atsign, strlen (atsign)+1);
            }
          else
            *atsign = '@';
        }
    }

  return result? ascii_strlwr (result): NULL;
}


/* Check whether UID is a valid standard user id of the form
     "Heinrich Heine <heinrichh@duesseldorf.de>"
   and return true if this is the case. */
int
is_valid_user_id (const char *uid)
{
  if (!uid || !*uid)
    return 0;

  return 1;
}


/* Returns true if STRING is a valid domain name according to the LDH
 * rule. */
int
is_valid_domain_name (const char *string)
{
  static char const ldh_chars[] =
    "01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-";
  const char *s;

  /* Note that we do not check the length limit of a label or the
   * entire name */

  for (s=string; *s; s++)
    if (*s == '.')
      {
        if (string == s)
          return 0; /* Dot at the start of the string.  */
                    /* (may also be at the end like in ".") */
        if (s[1] == '.')
          return 0; /* No - double dot.  */
      }
    else if (!strchr (ldh_chars, *s))
      return 0;
    else if (*s == '-')
      {
        if (string == s)
          return 0;  /* Leading hyphen.  */
        if (s[-1] == '.')
          return 0;  /* Hyphen at begin of a label.  */
        if (s[1] == '.')
          return 0;  /* Hyphen at start of a label.  */
        if (!s[1])
          return 0;  /* Trailing hyphen.  */
      }

  return !!*string;
}