/src/gnupg/common/mbox-util.c

Source
/* mbox-util.c - Mail address helper functions
 * Copyright (C) 1998-2010 Free Software Foundation, Inc.
 * Copyright (C) 1998-2015 Werner Koch
 *
 * This file is part of GnuPG.
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This file is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, see <https://www.gnu.org/licenses/>.
 */

/* NB: GPGME uses the same code to reflect our idea on how to extract
 * a mail address from a user id.
 */

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

#include "util.h"
#include "mbox-util.h"


static int
string_count_chr (const char *string, int c)
{
  int count;

  for (count=0; *string; string++ )
    if ( *string == c )
      count++;
  return count;
}

static int
mem_count_chr (const void *buffer, int c, size_t length)
{
  const char *s = buffer;
  int count;

  for (count=0; length; length--, s++)
    if (*s == c)
      count++;
  return count;
}


static int
string_has_ctrl_or_space (const char *string)
{
  for (; *string; string++ )
    if (!(*string & 0x80) && *string <= 0x20)
      return 1;
  return 0;
}


/* Return true if STRING has two consecutive '.' after an '@'
   sign.  */
static int
has_dotdot_after_at (const char *string)
{
  string = strchr (string, '@');
  if (!string)
    return 0; /* No at-sign.  */
  string++;
  return !!strstr (string, "..");
}


/* Check whether BUFFER has characters not valid in an RFC-822
   address.  LENGTH gives the length of BUFFER.

   To cope with OpenPGP we ignore non-ascii characters so that for
   example umlauts are legal in an email address.  An OpenPGP user ID
   must be utf-8 encoded but there is no strict requirement for
   RFC-822.  Thus to avoid IDNA encoding we put the address verbatim
   as utf-8 into the user ID under the assumption that mail programs
   handle IDNA at a lower level and take OpenPGP user IDs as utf-8.
   Note that we can't do an utf-8 encoding checking here because in
   keygen.c this function is called with the native encoding and
   native to utf-8 encoding is only done later.  */
int
has_invalid_email_chars (const void *buffer, size_t length)
{
  const unsigned char *s = buffer;
  int at_seen=0;
  const char *valid_chars=
    "01234567890_-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

  for ( ; length && *s; length--, s++ )
    {
      if ((*s & 0x80))
        continue; /* We only care about ASCII.  */
      if (*s == '@')
        at_seen=1;
      else if (!at_seen && !(strchr (valid_chars, *s)
                             || strchr ("!#$%&'*+/=?^`{|}~", *s)))
        return 1;
      else if (at_seen && !strchr (valid_chars, *s))
        return 1;
    }
  return 0;
}


/* Same as is_valid_mailbox (see below) but operates on non-nul
   terminated buffer.  */
int
is_valid_mailbox_mem (const void *name_arg, size_t namelen)
{
  const char *name = name_arg;

  return !( !name
            || !namelen
            || has_invalid_email_chars (name, namelen)
            || mem_count_chr (name, '@', namelen) != 1
            || *name == '@'
            || name[namelen-1] == '@'
            || name[namelen-1] == '.'
            || gnupg_memstr (name, namelen, ".."));
}


/* Check whether NAME represents a valid mailbox according to
   RFC822. Returns true if so. */
int
is_valid_mailbox (const char *name)
{
  return name? is_valid_mailbox_mem (name, strlen (name)) : 0;
}


/* Return the mailbox (local-part@domain) form a standard user id.
 * All plain ASCII characters in the result are converted to
 * lowercase.  If SUBADDRESS is 1, '+' denoted sub-addresses are not
 * included in the result.  Caller must free the result.  Returns NULL
 * if no valid mailbox was found (or we are out of memory). */
char *
mailbox_from_userid (const char *userid, int subaddress)
{
  const char *s, *s_end;
  size_t len;
  char *result = NULL;

  s = strchr (userid, '<');
  if (s)
    {
      /* Seems to be a standard user id.  */
      s++;
      s_end = strchr (s, '>');
      if (s_end && s_end > s)
        {
          len = s_end - s;
          result = xtrymalloc (len + 1);
          if (!result)
            return NULL; /* Ooops - out of core.  */
          strncpy (result, s, len);
          result[len] = 0;
          /* Apply some basic checks on the address.  We do not use
             is_valid_mailbox because those checks are too strict.  */
          if (string_count_chr (result, '@') != 1  /* Need exactly one '@.  */
              || *result == '@'           /* local-part missing.  */
              || result[len-1] == '@'     /* domain missing.  */
              || result[len-1] == '.'     /* ends with a dot.  */
              || string_has_ctrl_or_space (result)
              || has_dotdot_after_at (result))
            {
              xfree (result);
              result = NULL;
              errno = EINVAL;
            }
        }
      else
        errno = EINVAL;
    }
  else if (is_valid_mailbox (userid))
    {
      /* The entire user id is a mailbox.  Return that one.  Note that
         this fallback method has some restrictions on the valid
         syntax of the mailbox.  However, those who want weird
         addresses should know about it and use the regular <...>
         syntax.  */
      result = xtrystrdup (userid);
    }
  else
    errno = EINVAL;

  if (result && subaddress == 1)
    {
      char *atsign, *plus;

      if ((atsign = strchr (result, '@')))
        {
          /* We consider a subaddress only if there is a single '+'
           * in the local part and the '+' is not the first or last
           * character.  */
          *atsign = 0;
          if ((plus = strchr (result, '+'))
              && !strchr (plus+1, '+')
              && result != plus
              && plus[1] )
            {
              *atsign = '@';
              memmove (plus, atsign, strlen (atsign)+1);
            }
          else
            *atsign = '@';
        }
    }

  return result? ascii_strlwr (result): NULL;
}


/* Check whether UID is a valid standard user id of the form
     "Heinrich Heine <heinrichh@duesseldorf.de>"
   and return true if this is the case. */
int
is_valid_user_id (const char *uid)
{
  if (!uid || !*uid)
    return 0;

  return 1;
}


/* Returns true if STRING is a valid domain name according to the LDH
 * rule. */
int
is_valid_domain_name (const char *string)
{
  static char const ldh_chars[] =
    "01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-";
  const char *s;

  /* Note that we do not check the length limit of a label or the
   * entire name */

  for (s=string; *s; s++)
    if (*s == '.')
      {
        if (string == s)
          return 0; /* Dot at the start of the string.  */
                    /* (may also be at the end like in ".") */
        if (s[1] == '.')
          return 0; /* No - double dot.  */
      }
    else if (!strchr (ldh_chars, *s))
      return 0;
    else if (*s == '-')
      {
        if (string == s)
          return 0;  /* Leading hyphen.  */
        if (s[-1] == '.')
          return 0;  /* Hyphen at begin of a label.  */
        if (s[1] == '.')
          return 0;  /* Hyphen at start of a label.  */
        if (!s[1])
          return 0;  /* Trailing hyphen.  */
      }

  return !!*string;
}

Coverage Report

Created: 2026-01-09 06:48

Line	Count	Source
1		/* mbox-util.c - Mail address helper functions
2		* Copyright (C) 1998-2010 Free Software Foundation, Inc.
3		* Copyright (C) 1998-2015 Werner Koch
4		*
5		* This file is part of GnuPG.
6		*
7		* This file is free software; you can redistribute it and/or modify
8		* it under the terms of the GNU Lesser General Public License as
9		* published by the Free Software Foundation; either version 2.1 of
10		* the License, or (at your option) any later version.
11		*
12		* This file is distributed in the hope that it will be useful,
13		* but WITHOUT ANY WARRANTY; without even the implied warranty of
14		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		* GNU General Public License for more details.
16		*
17		* You should have received a copy of the GNU Lesser General Public License
18		* along with this program; if not, see <https://www.gnu.org/licenses/>.
19		*/
20
21		/* NB: GPGME uses the same code to reflect our idea on how to extract
22		* a mail address from a user id.
23		*/
24
25		#include <config.h>
26		#include <stdio.h>
27		#include <stdlib.h>
28		#include <string.h>
29		#include <unistd.h>
30		#include <errno.h>
31
32		#include "util.h"
33		#include "mbox-util.h"
34
35
36		static int
37		string_count_chr (const char *string, int c)
38	817	{
39	817	int count;
40
41	13.8k	for (count=0; *string; string++ )
42	12.9k	if ( *string == c )
43	883	count++;
44	817	return count;
45	817	}
46
47		static int
48		mem_count_chr (const void *buffer, int c, size_t length)
49	1.13k	{
50	1.13k	const char *s = buffer;
51	1.13k	int count;
52
53	7.57k	for (count=0; length; length--, s++)
54	6.43k	if (*s == c)
55	1.40k	count++;
56	1.13k	return count;
57	1.13k	}
58
59
60		static int
61		string_has_ctrl_or_space (const char *string)
62	484	{
63	9.93k	for (; *string; string++ )
64	9.51k	if (!(string & 0x80) && string <= 0x20)
65	58	return 1;
66	426	return 0;
67	484	}
68
69
70		/* Return true if STRING has two consecutive '.' after an '@'
71		sign. */
72		static int
73		has_dotdot_after_at (const char *string)
74	426	{
75	426	string = strchr (string, '@');
76	426	if (!string)
77	0	return 0; /* No at-sign. */
78	426	string++;
79	426	return !!strstr (string, "..");
80	426	}
81
82
83		/* Check whether BUFFER has characters not valid in an RFC-822
84		address. LENGTH gives the length of BUFFER.
85
86		To cope with OpenPGP we ignore non-ascii characters so that for
87		example umlauts are legal in an email address. An OpenPGP user ID
88		must be utf-8 encoded but there is no strict requirement for
89		RFC-822. Thus to avoid IDNA encoding we put the address verbatim
90		as utf-8 into the user ID under the assumption that mail programs
91		handle IDNA at a lower level and take OpenPGP user IDs as utf-8.
92		Note that we can't do an utf-8 encoding checking here because in
93		keygen.c this function is called with the native encoding and
94		native to utf-8 encoding is only done later. */
95		int
96		has_invalid_email_chars (const void *buffer, size_t length)
97	1.99k	{
98	1.99k	const unsigned char *s = buffer;
99	1.99k	int at_seen=0;
100	1.99k	const char *valid_chars=
101	1.99k	"01234567890_-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
102
103	10.8k	for ( ; length && *s; length--, s++ )
104	9.68k	{
105	9.68k	if ((*s & 0x80))
106	661	continue; /* We only care about ASCII. */
107	9.02k	if (*s == '@')
108	2.13k	at_seen=1;
109	6.89k	else if (!at_seen && !(strchr (valid_chars, *s)
110	594	\|\| strchr ("!#$%&'+/=?^`{\|}~", s)))
111	130	return 1;
112	6.76k	else if (at_seen && !strchr (valid_chars, *s))
113	728	return 1;
114	9.02k	}
115	1.13k	return 0;
116	1.99k	}
117
118
119		/* Same as is_valid_mailbox (see below) but operates on non-nul
120		terminated buffer. */
121		int
122		is_valid_mailbox_mem (const void *name_arg, size_t namelen)
123	1.99k	{
124	1.99k	const char *name = name_arg;
125
126	1.99k	return !( !name
127	1.99k	\|\| !namelen
128	1.99k	\|\| has_invalid_email_chars (name, namelen)
129	1.13k	\|\| mem_count_chr (name, '@', namelen) != 1
130	831	\|\| *name == '@'
131	778	\|\| name[namelen-1] == '@'
132	428	\|\| name[namelen-1] == '.'
133	391	\|\| gnupg_memstr (name, namelen, ".."));
134	1.99k	}
135
136
137		/* Check whether NAME represents a valid mailbox according to
138		RFC822. Returns true if so. */
139		int
140		is_valid_mailbox (const char *name)
141	1.99k	{
142	1.99k	return name? is_valid_mailbox_mem (name, strlen (name)) : 0;
143	1.99k	}
144
145
146		/* Return the mailbox (local-part@domain) form a standard user id.
147		* All plain ASCII characters in the result are converted to
148		* lowercase. If SUBADDRESS is 1, '+' denoted sub-addresses are not
149		* included in the result. Caller must free the result. Returns NULL
150		* if no valid mailbox was found (or we are out of memory). */
151		char *
152		mailbox_from_userid (const char *userid, int subaddress)
153	3.08k	{
154	3.08k	const char s, s_end;
155	3.08k	size_t len;
156	3.08k	char *result = NULL;
157
158	3.08k	s = strchr (userid, '<');
159	3.08k	if (s)
160	1.08k	{
161		/* Seems to be a standard user id. */
162	1.08k	s++;
163	1.08k	s_end = strchr (s, '>');
164	1.08k	if (s_end && s_end > s)
165	817	{
166	817	len = s_end - s;
167	817	result = xtrymalloc (len + 1);
168	817	if (!result)
169	0	return NULL; /* Ooops - out of core. */
170	817	strncpy (result, s, len);
171	817	result[len] = 0;
172		/* Apply some basic checks on the address. We do not use
173		is_valid_mailbox because those checks are too strict. */
174	817	if (string_count_chr (result, '@') != 1 /* Need exactly one '@. */
175	725	\|\| result == '@' / local-part missing. */
176	673	\|\| result[len-1] == '@' /* domain missing. */
177	604	\|\| result[len-1] == '.' /* ends with a dot. */
178	484	\|\| string_has_ctrl_or_space (result)
179	426	\|\| has_dotdot_after_at (result))
180	565	{
181	565	xfree (result);
182	565	result = NULL;
183	565	errno = EINVAL;
184	565	}
185	817	}
186	270	else
187	1.08k	errno = EINVAL;
188	1.08k	}
189	1.99k	else if (is_valid_mailbox (userid))
190	33	{
191		/* The entire user id is a mailbox. Return that one. Note that
192		this fallback method has some restrictions on the valid
193		syntax of the mailbox. However, those who want weird
194		addresses should know about it and use the regular <...>
195		syntax. */
196	33	result = xtrystrdup (userid);
197	33	}
198	1.96k	else
199	1.99k	errno = EINVAL;
200
201	3.08k	if (result && subaddress == 1)
202	0	{
203	0	char atsign, plus;
204
205	0	if ((atsign = strchr (result, '@')))
206	0	{
207		/* We consider a subaddress only if there is a single '+'
208		* in the local part and the '+' is not the first or last
209		* character. */
210	0	*atsign = 0;
211	0	if ((plus = strchr (result, '+'))
212	0	&& !strchr (plus+1, '+')
213	0	&& result != plus
214	0	&& plus[1] )
215	0	{
216	0	*atsign = '@';
217	0	memmove (plus, atsign, strlen (atsign)+1);
218	0	}
219	0	else
220	0	*atsign = '@';
221	0	}
222	0	}
223
224	3.08k	return result? ascii_strlwr (result): NULL;
225	3.08k	}
226
227
228		/* Check whether UID is a valid standard user id of the form
229		"Heinrich Heine <heinrichh@duesseldorf.de>"
230		and return true if this is the case. */
231		int
232		is_valid_user_id (const char *uid)
233	0	{
234	0	if (!uid \|\| !*uid)
235	0	return 0;
236
237	0	return 1;
238	0	}
239
240
241		/* Returns true if STRING is a valid domain name according to the LDH
242		* rule. */
243		int
244		is_valid_domain_name (const char *string)
245	0	{
246	0	static char const ldh_chars[] =
247	0	"01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-";
248	0	const char *s;
249
250		/* Note that we do not check the length limit of a label or the
251		* entire name */
252
253	0	for (s=string; *s; s++)
254	0	if (*s == '.')
255	0	{
256	0	if (string == s)
257	0	return 0; /* Dot at the start of the string. */
258		/* (may also be at the end like in ".") */
259	0	if (s[1] == '.')
260	0	return 0; /* No - double dot. */
261	0	}
262	0	else if (!strchr (ldh_chars, *s))
263	0	return 0;
264	0	else if (*s == '-')
265	0	{
266	0	if (string == s)
267	0	return 0; /* Leading hyphen. */
268	0	if (s[-1] == '.')
269	0	return 0; /* Hyphen at begin of a label. */
270	0	if (s[1] == '.')
271	0	return 0; /* Hyphen at start of a label. */
272	0	if (!s[1])
273	0	return 0; /* Trailing hyphen. */
274	0	}
275
276	0	return !!*string;
277	0	}