/src/dovecot/src/lib-mail/rfc822-parser.c

Source (jump to first uncovered line)
/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "str.h"
#include "punycode.h"
#include "strescape.h"
#include "rfc822-parser.h"

/*
   atext        =       ALPHA / DIGIT / ; Any character except controls,
      "!" / "#" /     ;  SP, and specials.
      "$" / "%" /     ;  Used for atoms
      "&" / "'" /
      "*" / "+" /
      "-" / "/" /
      "=" / "?" /
      "^" / "_" /
      "`" / "{" /
      "|" / "}" /
      "~"

  MIME:

  token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
        or tspecials>
  tspecials :=  "(" / ")" / "<" / ">" / "@" /
    "," / ";" / ":" / "\" / <">
    "/" / "[" / "]" / "?" / "="

  So token is same as dot-atom, except stops also at '/', '?' and '='.
*/

/* atext chars are marked with 1, alpha and digits with 2,
   atext-but-mime-tspecials with 4 */
unsigned char rfc822_atext_chars[256] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
  0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */
  0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */

  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};

void rfc822_parser_init(struct rfc822_parser_context *ctx,
      const unsigned char *data, size_t size,
      string_t *last_comment)
{
  i_zero(ctx);
  ctx->data = data;
  ctx->end = data + size;
  ctx->last_comment = last_comment;
}

int rfc822_skip_comment(struct rfc822_parser_context *ctx)
{
  const unsigned char *start;
  size_t len;
  int level = 1;

  i_assert(*ctx->data == '(');

  if (ctx->last_comment != NULL)
    str_truncate(ctx->last_comment, 0);

  start = ++ctx->data;
  for (; ctx->data < ctx->end; ctx->data++) {
    switch (*ctx->data) {
    case '\0':
      if (ctx->last_comment != NULL &&
          ctx->nul_replacement_str != NULL) {
        str_append_data(ctx->last_comment, start,
            ctx->data - start);
        str_append(ctx->last_comment,
             ctx->nul_replacement_str);
        start = ctx->data + 1;
      }
      break;
    case '(':
      level++;
      break;
    case ')':
      if (--level == 0) {
        if (ctx->last_comment != NULL) {
          str_append_data(ctx->last_comment, start,
              ctx->data - start);
        }
        ctx->data++;
        return ctx->data < ctx->end ? 1 : 0;
      }
      break;
    case '\n':
      /* folding whitespace, remove the (CR)LF */
      if (ctx->last_comment == NULL)
        break;
      len = ctx->data - start;
      if (len > 0 && start[len-1] == '\r')
        len--;
      str_append_data(ctx->last_comment, start, len);
      start = ctx->data + 1;
      break;
    case '\\':
      ctx->data++;
      if (ctx->data >= ctx->end)
        return -1;

      if (*ctx->data == '\r' || *ctx->data == '\n' ||
          *ctx->data == '\0') {
        /* quoted-pair doesn't allow CR/LF/NUL.
           They are part of the obs-qp though, so don't
           return them as error. */
        ctx->data--;
        break;
      }
      if (ctx->last_comment != NULL) {
        str_append_data(ctx->last_comment, start,
            ctx->data - start - 1);
      }
      start = ctx->data;
      break;
    }
  }

  /* missing ')' */
  return -1;
}

int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
{
  for (; ctx->data < ctx->end;) {
    if (*ctx->data == ' ' || *ctx->data == '\t' ||
        *ctx->data == '\r' || *ctx->data == '\n') {
      ctx->data++;
      continue;
    }

    if (*ctx->data != '(')
      break;

    if (rfc822_skip_comment(ctx) < 0)
      return -1;
  }
  return ctx->data < ctx->end ? 1 : 0;
}

int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
{
  const unsigned char *start;

  /*
     atom            = [CFWS] 1*atext [CFWS]
     atext           =
       ; Any character except controls, SP, and specials.
  */
  if (ctx->data >= ctx->end || !IS_ATEXT(*ctx->data))
    return -1;

  for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) {
    if (IS_ATEXT(*ctx->data))
      continue;

    str_append_data(str, start, ctx->data - start);
    return rfc822_skip_lwsp(ctx);
  }

  str_append_data(str, start, ctx->data - start);
  return 0;
}

int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
{
  const unsigned char *start;
  int ret;

  /*
     dot-atom        = [CFWS] dot-atom-text [CFWS]
     dot-atom-text   = 1*atext *("." 1*atext)

     atext           =
       ; Any character except controls, SP, and specials.

     For RFC-822 compatibility allow LWSP around '.'
  */
  if (ctx->data >= ctx->end || !IS_ATEXT(*ctx->data))
    return -1;

  for (start = ctx->data++; ctx->data < ctx->end; ) {
    if (IS_ATEXT(*ctx->data)) {
      ctx->data++;
      continue;
    }

    if (start == ctx->data)
      return -1;
    str_append_data(str, start, ctx->data - start);

    if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
      return ret;

    if (*ctx->data != '.')
      return 1;

    ctx->data++;
    str_append_c(str, '.');

    if (rfc822_skip_lwsp(ctx) <= 0)
      return -1;
    start = ctx->data;
  }

  i_assert(start != ctx->data);
  str_append_data(str, start, ctx->data - start);
  return 0;
}

int rfc822_parse_mime_token(struct rfc822_parser_context *ctx, string_t *str)
{
  const unsigned char *start;

  for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
    if (IS_ATEXT_NON_TSPECIAL(*ctx->data) || *ctx->data == '.')
      continue;

    str_append_data(str, start, ctx->data - start);
    return rfc822_skip_lwsp(ctx);
  }

  str_append_data(str, start, ctx->data - start);
  return 0;
}

int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
{
  const unsigned char *start;
  size_t len;

  i_assert(ctx->data < ctx->end);
  i_assert(*ctx->data == '"');
  ctx->data++;

  for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
    switch (*ctx->data) {
    case '\0':
      if (ctx->nul_replacement_str != NULL) {
        str_append_data(str, start, ctx->data - start);
        str_append(str, ctx->nul_replacement_str);
        start = ctx->data + 1;
      }
      break;
    case '"':
      str_append_data(str, start, ctx->data - start);
      ctx->data++;
      return rfc822_skip_lwsp(ctx);
    case '\n':
      /* folding whitespace, remove the (CR)LF */
      len = ctx->data - start;
      if (len > 0 && start[len-1] == '\r')
        len--;
      str_append_data(str, start, len);
      start = ctx->data + 1;
      break;
    case '\\':
      ctx->data++;
      if (ctx->data >= ctx->end)
        return -1;

      if (*ctx->data == '\r' || *ctx->data == '\n' ||
          *ctx->data == '\0') {
        /* quoted-pair doesn't allow CR/LF/NUL.
           They are part of the obs-qp though, so don't
           return them as error. */
        ctx->data--;
        break;
      }
      str_append_data(str, start, ctx->data - start - 1);
      start = ctx->data;
      break;
    }
  }

  /* missing '"' */
  return -1;
}

static int
rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str)
{
  const unsigned char *start;

  /*
     atom            = [CFWS] 1*atext [CFWS]
     atext           =
       ; Any character except controls, SP, and specials.

     The difference between this function and rfc822_parse_dot_atom()
     is that this doesn't just silently skip over all the whitespace.
  */
  for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
    if (IS_ATEXT(*ctx->data) || *ctx->data == '.')
      continue;

    str_append_data(str, start, ctx->data - start);
    return rfc822_skip_lwsp(ctx);
  }

  str_append_data(str, start, ctx->data - start);
  return 0;
}

int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
{
  int ret;

  /*
     phrase     = 1*word / obs-phrase
     word       = atom / quoted-string
     obs-phrase = word *(word / "." / CFWS)
  */

  if (ctx->data >= ctx->end)
    return 0;
  if (*ctx->data == '.')
    return -1;

  for (;;) {
    if (*ctx->data == '"')
      ret = rfc822_parse_quoted_string(ctx, str);
    else
      ret = rfc822_parse_atom_or_dot(ctx, str);

    if (ret <= 0)
      return ret;

    if (!IS_ATEXT(*ctx->data) && *ctx->data != '"'
        && *ctx->data != '.')
      break;
    str_append_c(str, ' ');
  }
  return rfc822_skip_lwsp(ctx);
}

static int
rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
{
  const unsigned char *start;
  size_t len;

  /*
     domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
     dcontent        = dtext / quoted-pair
     dtext           = NO-WS-CTL /     ; Non white space controls
           %d33-90 /       ; The rest of the US-ASCII
           %d94-126        ;  characters not including "[",
               ;  "]", or "\"
  */
  i_assert(ctx->data < ctx->end);
  i_assert(*ctx->data == '[');

  for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) {
    switch (*ctx->data) {
    case '\0':
      if (ctx->nul_replacement_str != NULL) {
        str_append_data(str, start, ctx->data - start);
        str_append(str, ctx->nul_replacement_str);
        start = ctx->data + 1;
      }
      break;
    case '[':
      /* not allowed */
      return -1;
    case ']':
      str_append_data(str, start, ctx->data - start + 1);
      ctx->data++;
      return rfc822_skip_lwsp(ctx);
    case '\n':
      /* folding whitespace, remove the (CR)LF */
      len = ctx->data - start;
      if (len > 0 && start[len-1] == '\r')
        len--;
      str_append_data(str, start, len);
      start = ctx->data + 1;
      break;
    case '\\':
      /* note: the '\' is preserved in the output */
      ctx->data++;
      if (ctx->data >= ctx->end)
        return -1;

      if (*ctx->data == '\r' || *ctx->data == '\n' ||
          *ctx->data == '\0') {
        /* quoted-pair doesn't allow CR/LF/NUL.
           They are part of the obs-qp though, so don't
           return them as error. */
        str_append_data(str, start, ctx->data - start);
        start = ctx->data;
        ctx->data--;
        break;
      }
    }
  }

  /* missing ']' */
  return -1;
}

void rfc822_decode_punycode(const char *input, size_t len, string_t *result)
{
  string_t *decoded = t_str_new(64);
  const char *pos = input;
  const char *end = CONST_PTR_OFFSET(input, len);

  while (pos < end) {
    const char *value;
    const char *delim = strchr(pos, '.');
    if (delim == NULL)
      delim = end;
    if (str_begins(pos, "xn--", &value)) {
      str_truncate(decoded, 0);
      if (punycode_decode(value, delim - value, result) < 0)
        /* Consider it as data */
        str_append_data(result, pos, delim - pos + 1);
      else if (*delim == '.')
        str_append_c(result, *delim);
    } else {
      /* No punycode prefix */
      str_append_data(result, pos, delim - pos + 1);
    }
    pos = delim + 1;
  }
  if (pos < end)
    str_append_data(result, pos, end - pos);
}

int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
{
  /*
     domain          = dot-atom / domain-literal / obs-domain
     domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
     obs-domain      = atom *("." atom)
  */
  i_assert(ctx->data < ctx->end);
  i_assert(*ctx->data == '@');
  ctx->data++;

  if (rfc822_skip_lwsp(ctx) <= 0)
    return -1;

  if (*ctx->data == '[')
    return rfc822_parse_domain_literal(ctx, str);
  else {
    int ret = rfc822_parse_dot_atom(ctx, str);
#ifdef EXPERIMENTAL_MAIL_UTF8
    if (ret == 0) {
      size_t start_pos = str_len(str);
      string_t *u = t_str_new(64);
      const char *data = t_strndup(str_data(str) + start_pos,
                 str_len(str) - start_pos);
      rfc822_decode_punycode(data, strlen(data), u);
      str_truncate(str, start_pos);
      str_append_str(str, u);
    }
#endif
    return ret;
  }
}

int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str)
{
  size_t str_pos_0 = str->used;
  if (rfc822_skip_lwsp(ctx) <= 0)
    return -1;

  /* get main type, require at least one byte */
  if (rfc822_parse_mime_token(ctx, str) <= 0 ||
      str->used == str_pos_0)
    return -1;

  /* skip over "/" */
  if (*ctx->data != '/') {
    str_truncate(str, str_pos_0);
    return -1;
  }
  ctx->data++;
  if (rfc822_skip_lwsp(ctx) <= 0) {
    str_truncate(str, str_pos_0);
    return -1;
  }
  str_append_c(str, '/');

  size_t str_pos = str->used;
  /* get subtype, require at least one byte,
     and check the next separator to avoid accepting
     invalid values. */
  int ret;
  if ((ret = rfc822_parse_mime_token(ctx, str)) < 0 ||
      str->used == str_pos ||
      (ctx->data != ctx->end && *ctx->data != ';')) {
    str_truncate(str, str_pos_0);
    return -1;
  }
  return ret;
}

int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
             const char **key_r, string_t *value)
{
  string_t *key;
  int ret;

  /* .. := *(";" parameter)
     parameter := attribute "=" value
     attribute := token
     value := token / quoted-string
  */
  *key_r = NULL;
  str_truncate(value, 0);

  if (ctx->data >= ctx->end)
    return 0;
  if (*ctx->data != ';')
    return -1;
  ctx->data++;

  if (rfc822_skip_lwsp(ctx) <= 0)
    return -1;

  key = t_str_new(64);
  if (rfc822_parse_mime_token(ctx, key) <= 0)
    return -1;

  if (*ctx->data != '=')
    return -1;
  ctx->data++;

  if ((ret = rfc822_skip_lwsp(ctx)) <= 0) {
    /* broken / no value */
  } else if (*ctx->data == '"') {
    ret = rfc822_parse_quoted_string(ctx, value);
  } else if (ctx->data < ctx->end && *ctx->data == '=') {
    /* workaround for broken input:
       name==?utf-8?b?...?= */
    while (ctx->data < ctx->end && *ctx->data != ';' &&
           *ctx->data != ' ' && *ctx->data != '\t' &&
           *ctx->data != '\r' && *ctx->data != '\n') {
      str_append_c(value, *ctx->data);
      ctx->data++;
    }
  } else {
    ret = rfc822_parse_mime_token(ctx, value);
  }

  *key_r = str_c(key);
  return ret < 0 ? -1 : 1;
}

Coverage Report

Created: 2025-08-03 06:27

Line	Count	Source (jump to first uncovered line)
1		/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */
2
3		#include "lib.h"
4		#include "str.h"
5		#include "punycode.h"
6		#include "strescape.h"
7		#include "rfc822-parser.h"
8
9		/*
10		atext = ALPHA / DIGIT / ; Any character except controls,
11		"!" / "#" / ; SP, and specials.
12		"$" / "%" / ; Used for atoms
13		"&" / "'" /
14		"*" / "+" /
15		"-" / "/" /
16		"=" / "?" /
17		"^" / "_" /
18		"`" / "{" /
19		"\|" / "}" /
20		"~"
21
22		MIME:
23
24		token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
25		or tspecials>
26		tspecials := "(" / ")" / "<" / ">" / "@" /
27		"," / ";" / ":" / "\" / <">
28		"/" / "[" / "]" / "?" / "="
29
30		So token is same as dot-atom, except stops also at '/', '?' and '='.
31		*/
32
33		/* atext chars are marked with 1, alpha and digits with 2,
34		atext-but-mime-tspecials with 4 */
35		unsigned char rfc822_atext_chars[256] = {
36		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
37		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
38		0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */
39		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */
40		0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
41		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
42		1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
43		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */
44
45		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
53		};
54
55		void rfc822_parser_init(struct rfc822_parser_context *ctx,
56		const unsigned char *data, size_t size,
57		string_t *last_comment)
58	0	{
59	0	i_zero(ctx);
60	0	ctx->data = data;
61	0	ctx->end = data + size;
62	0	ctx->last_comment = last_comment;
63	0	}
64
65		int rfc822_skip_comment(struct rfc822_parser_context *ctx)
66	0	{
67	0	const unsigned char *start;
68	0	size_t len;
69	0	int level = 1;
70
71	0	i_assert(*ctx->data == '(');
72
73	0	if (ctx->last_comment != NULL)
74	0	str_truncate(ctx->last_comment, 0);
75
76	0	start = ++ctx->data;
77	0	for (; ctx->data < ctx->end; ctx->data++) {
78	0	switch (*ctx->data) {
79	0	case '\0':
80	0	if (ctx->last_comment != NULL &&
81	0	ctx->nul_replacement_str != NULL) {
82	0	str_append_data(ctx->last_comment, start,
83	0	ctx->data - start);
84	0	str_append(ctx->last_comment,
85	0	ctx->nul_replacement_str);
86	0	start = ctx->data + 1;
87	0	}
88	0	break;
89	0	case '(':
90	0	level++;
91	0	break;
92	0	case ')':
93	0	if (--level == 0) {
94	0	if (ctx->last_comment != NULL) {
95	0	str_append_data(ctx->last_comment, start,
96	0	ctx->data - start);
97	0	}
98	0	ctx->data++;
99	0	return ctx->data < ctx->end ? 1 : 0;
100	0	}
101	0	break;
102	0	case '\n':
103		/* folding whitespace, remove the (CR)LF */
104	0	if (ctx->last_comment == NULL)
105	0	break;
106	0	len = ctx->data - start;
107	0	if (len > 0 && start[len-1] == '\r')
108	0	len--;
109	0	str_append_data(ctx->last_comment, start, len);
110	0	start = ctx->data + 1;
111	0	break;
112	0	case '\\':
113	0	ctx->data++;
114	0	if (ctx->data >= ctx->end)
115	0	return -1;
116
117	0	if (ctx->data == '\r' \|\| ctx->data == '\n' \|\|
118	0	*ctx->data == '\0') {
119		/* quoted-pair doesn't allow CR/LF/NUL.
120		They are part of the obs-qp though, so don't
121		return them as error. */
122	0	ctx->data--;
123	0	break;
124	0	}
125	0	if (ctx->last_comment != NULL) {
126	0	str_append_data(ctx->last_comment, start,
127	0	ctx->data - start - 1);
128	0	}
129	0	start = ctx->data;
130	0	break;
131	0	}
132	0	}
133
134		/* missing ')' */
135	0	return -1;
136	0	}
137
138		int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
139	0	{
140	0	for (; ctx->data < ctx->end;) {
141	0	if (ctx->data == ' ' \|\| ctx->data == '\t' \|\|
142	0	ctx->data == '\r' \|\| ctx->data == '\n') {
143	0	ctx->data++;
144	0	continue;
145	0	}
146
147	0	if (*ctx->data != '(')
148	0	break;
149
150	0	if (rfc822_skip_comment(ctx) < 0)
151	0	return -1;
152	0	}
153	0	return ctx->data < ctx->end ? 1 : 0;
154	0	}
155
156		int rfc822_parse_atom(struct rfc822_parser_context ctx, string_t str)
157	0	{
158	0	const unsigned char *start;
159
160		/*
161		atom = [CFWS] 1*atext [CFWS]
162		atext =
163		; Any character except controls, SP, and specials.
164		*/
165	0	if (ctx->data >= ctx->end \|\| !IS_ATEXT(*ctx->data))
166	0	return -1;
167
168	0	for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) {
169	0	if (IS_ATEXT(*ctx->data))
170	0	continue;
171
172	0	str_append_data(str, start, ctx->data - start);
173	0	return rfc822_skip_lwsp(ctx);
174	0	}
175
176	0	str_append_data(str, start, ctx->data - start);
177	0	return 0;
178	0	}
179
180		int rfc822_parse_dot_atom(struct rfc822_parser_context ctx, string_t str)
181	0	{
182	0	const unsigned char *start;
183	0	int ret;
184
185		/*
186		dot-atom = [CFWS] dot-atom-text [CFWS]
187		dot-atom-text = 1atext ("." 1*atext)
188
189		atext =
190		; Any character except controls, SP, and specials.
191
192		For RFC-822 compatibility allow LWSP around '.'
193		*/
194	0	if (ctx->data >= ctx->end \|\| !IS_ATEXT(*ctx->data))
195	0	return -1;
196
197	0	for (start = ctx->data++; ctx->data < ctx->end; ) {
198	0	if (IS_ATEXT(*ctx->data)) {
199	0	ctx->data++;
200	0	continue;
201	0	}
202
203	0	if (start == ctx->data)
204	0	return -1;
205	0	str_append_data(str, start, ctx->data - start);
206
207	0	if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
208	0	return ret;
209
210	0	if (*ctx->data != '.')
211	0	return 1;
212
213	0	ctx->data++;
214	0	str_append_c(str, '.');
215
216	0	if (rfc822_skip_lwsp(ctx) <= 0)
217	0	return -1;
218	0	start = ctx->data;
219	0	}
220
221	0	i_assert(start != ctx->data);
222	0	str_append_data(str, start, ctx->data - start);
223	0	return 0;
224	0	}
225
226		int rfc822_parse_mime_token(struct rfc822_parser_context ctx, string_t str)
227	0	{
228	0	const unsigned char *start;
229
230	0	for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
231	0	if (IS_ATEXT_NON_TSPECIAL(ctx->data) \|\| ctx->data == '.')
232	0	continue;
233
234	0	str_append_data(str, start, ctx->data - start);
235	0	return rfc822_skip_lwsp(ctx);
236	0	}
237
238	0	str_append_data(str, start, ctx->data - start);
239	0	return 0;
240	0	}
241
242		int rfc822_parse_quoted_string(struct rfc822_parser_context ctx, string_t str)
243	0	{
244	0	const unsigned char *start;
245	0	size_t len;
246
247	0	i_assert(ctx->data < ctx->end);
248	0	i_assert(*ctx->data == '"');
249	0	ctx->data++;
250
251	0	for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
252	0	switch (*ctx->data) {
253	0	case '\0':
254	0	if (ctx->nul_replacement_str != NULL) {
255	0	str_append_data(str, start, ctx->data - start);
256	0	str_append(str, ctx->nul_replacement_str);
257	0	start = ctx->data + 1;
258	0	}
259	0	break;
260	0	case '"':
261	0	str_append_data(str, start, ctx->data - start);
262	0	ctx->data++;
263	0	return rfc822_skip_lwsp(ctx);
264	0	case '\n':
265		/* folding whitespace, remove the (CR)LF */
266	0	len = ctx->data - start;
267	0	if (len > 0 && start[len-1] == '\r')
268	0	len--;
269	0	str_append_data(str, start, len);
270	0	start = ctx->data + 1;
271	0	break;
272	0	case '\\':
273	0	ctx->data++;
274	0	if (ctx->data >= ctx->end)
275	0	return -1;
276
277	0	if (ctx->data == '\r' \|\| ctx->data == '\n' \|\|
278	0	*ctx->data == '\0') {
279		/* quoted-pair doesn't allow CR/LF/NUL.
280		They are part of the obs-qp though, so don't
281		return them as error. */
282	0	ctx->data--;
283	0	break;
284	0	}
285	0	str_append_data(str, start, ctx->data - start - 1);
286	0	start = ctx->data;
287	0	break;
288	0	}
289	0	}
290
291		/* missing '"' */
292	0	return -1;
293	0	}
294
295		static int
296		rfc822_parse_atom_or_dot(struct rfc822_parser_context ctx, string_t str)
297	0	{
298	0	const unsigned char *start;
299
300		/*
301		atom = [CFWS] 1*atext [CFWS]
302		atext =
303		; Any character except controls, SP, and specials.
304
305		The difference between this function and rfc822_parse_dot_atom()
306		is that this doesn't just silently skip over all the whitespace.
307		*/
308	0	for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
309	0	if (IS_ATEXT(ctx->data) \|\| ctx->data == '.')
310	0	continue;
311
312	0	str_append_data(str, start, ctx->data - start);
313	0	return rfc822_skip_lwsp(ctx);
314	0	}
315
316	0	str_append_data(str, start, ctx->data - start);
317	0	return 0;
318	0	}
319
320		int rfc822_parse_phrase(struct rfc822_parser_context ctx, string_t str)
321	0	{
322	0	int ret;
323
324		/*
325		phrase = 1*word / obs-phrase
326		word = atom / quoted-string
327		obs-phrase = word *(word / "." / CFWS)
328		*/
329
330	0	if (ctx->data >= ctx->end)
331	0	return 0;
332	0	if (*ctx->data == '.')
333	0	return -1;
334
335	0	for (;;) {
336	0	if (*ctx->data == '"')
337	0	ret = rfc822_parse_quoted_string(ctx, str);
338	0	else
339	0	ret = rfc822_parse_atom_or_dot(ctx, str);
340
341	0	if (ret <= 0)
342	0	return ret;
343
344	0	if (!IS_ATEXT(ctx->data) && ctx->data != '"'
345	0	&& *ctx->data != '.')
346	0	break;
347	0	str_append_c(str, ' ');
348	0	}
349	0	return rfc822_skip_lwsp(ctx);
350	0	}
351
352		static int
353		rfc822_parse_domain_literal(struct rfc822_parser_context ctx, string_t str)
354	0	{
355	0	const unsigned char *start;
356	0	size_t len;
357
358		/*
359		domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
360		dcontent = dtext / quoted-pair
361		dtext = NO-WS-CTL / ; Non white space controls
362		%d33-90 / ; The rest of the US-ASCII
363		%d94-126 ; characters not including "[",
364		; "]", or "\"
365		*/
366	0	i_assert(ctx->data < ctx->end);
367	0	i_assert(*ctx->data == '[');
368
369	0	for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) {
370	0	switch (*ctx->data) {
371	0	case '\0':
372	0	if (ctx->nul_replacement_str != NULL) {
373	0	str_append_data(str, start, ctx->data - start);
374	0	str_append(str, ctx->nul_replacement_str);
375	0	start = ctx->data + 1;
376	0	}
377	0	break;
378	0	case '[':
379		/* not allowed */
380	0	return -1;
381	0	case ']':
382	0	str_append_data(str, start, ctx->data - start + 1);
383	0	ctx->data++;
384	0	return rfc822_skip_lwsp(ctx);
385	0	case '\n':
386		/* folding whitespace, remove the (CR)LF */
387	0	len = ctx->data - start;
388	0	if (len > 0 && start[len-1] == '\r')
389	0	len--;
390	0	str_append_data(str, start, len);
391	0	start = ctx->data + 1;
392	0	break;
393	0	case '\\':
394		/* note: the '\' is preserved in the output */
395	0	ctx->data++;
396	0	if (ctx->data >= ctx->end)
397	0	return -1;
398
399	0	if (ctx->data == '\r' \|\| ctx->data == '\n' \|\|
400	0	*ctx->data == '\0') {
401		/* quoted-pair doesn't allow CR/LF/NUL.
402		They are part of the obs-qp though, so don't
403		return them as error. */
404	0	str_append_data(str, start, ctx->data - start);
405	0	start = ctx->data;
406	0	ctx->data--;
407	0	break;
408	0	}
409	0	}
410	0	}
411
412		/* missing ']' */
413	0	return -1;
414	0	}
415
416		void rfc822_decode_punycode(const char input, size_t len, string_t result)
417	0	{
418	0	string_t *decoded = t_str_new(64);
419	0	const char *pos = input;
420	0	const char *end = CONST_PTR_OFFSET(input, len);
421
422	0	while (pos < end) {
423	0	const char *value;
424	0	const char *delim = strchr(pos, '.');
425	0	if (delim == NULL)
426	0	delim = end;
427	0	if (str_begins(pos, "xn--", &value)) {
428	0	str_truncate(decoded, 0);
429	0	if (punycode_decode(value, delim - value, result) < 0)
430		/* Consider it as data */
431	0	str_append_data(result, pos, delim - pos + 1);
432	0	else if (*delim == '.')
433	0	str_append_c(result, *delim);
434	0	} else {
435		/* No punycode prefix */
436	0	str_append_data(result, pos, delim - pos + 1);
437	0	}
438	0	pos = delim + 1;
439	0	}
440	0	if (pos < end)
441	0	str_append_data(result, pos, end - pos);
442	0	}
443
444		int rfc822_parse_domain(struct rfc822_parser_context ctx, string_t str)
445	0	{
446		/*
447		domain = dot-atom / domain-literal / obs-domain
448		domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
449		obs-domain = atom *("." atom)
450		*/
451	0	i_assert(ctx->data < ctx->end);
452	0	i_assert(*ctx->data == '@');
453	0	ctx->data++;
454
455	0	if (rfc822_skip_lwsp(ctx) <= 0)
456	0	return -1;
457
458	0	if (*ctx->data == '[')
459	0	return rfc822_parse_domain_literal(ctx, str);
460	0	else {
461	0	int ret = rfc822_parse_dot_atom(ctx, str);
462		#ifdef EXPERIMENTAL_MAIL_UTF8
463		if (ret == 0) {
464		size_t start_pos = str_len(str);
465		string_t *u = t_str_new(64);
466		const char *data = t_strndup(str_data(str) + start_pos,
467		str_len(str) - start_pos);
468		rfc822_decode_punycode(data, strlen(data), u);
469		str_truncate(str, start_pos);
470		str_append_str(str, u);
471		}
472		#endif
473	0	return ret;
474	0	}
475	0	}
476
477		int rfc822_parse_content_type(struct rfc822_parser_context ctx, string_t str)
478	0	{
479	0	size_t str_pos_0 = str->used;
480	0	if (rfc822_skip_lwsp(ctx) <= 0)
481	0	return -1;
482
483		/* get main type, require at least one byte */
484	0	if (rfc822_parse_mime_token(ctx, str) <= 0 \|\|
485	0	str->used == str_pos_0)
486	0	return -1;
487
488		/* skip over "/" */
489	0	if (*ctx->data != '/') {
490	0	str_truncate(str, str_pos_0);
491	0	return -1;
492	0	}
493	0	ctx->data++;
494	0	if (rfc822_skip_lwsp(ctx) <= 0) {
495	0	str_truncate(str, str_pos_0);
496	0	return -1;
497	0	}
498	0	str_append_c(str, '/');
499
500	0	size_t str_pos = str->used;
501		/* get subtype, require at least one byte,
502		and check the next separator to avoid accepting
503		invalid values. */
504	0	int ret;
505	0	if ((ret = rfc822_parse_mime_token(ctx, str)) < 0 \|\|
506	0	str->used == str_pos \|\|
507	0	(ctx->data != ctx->end && *ctx->data != ';')) {
508	0	str_truncate(str, str_pos_0);
509	0	return -1;
510	0	}
511	0	return ret;
512	0	}
513
514		int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
515		const char *key_r, string_t value)
516	0	{
517	0	string_t *key;
518	0	int ret;
519
520		/* .. := *(";" parameter)
521		parameter := attribute "=" value
522		attribute := token
523		value := token / quoted-string
524		*/
525	0	*key_r = NULL;
526	0	str_truncate(value, 0);
527
528	0	if (ctx->data >= ctx->end)
529	0	return 0;
530	0	if (*ctx->data != ';')
531	0	return -1;
532	0	ctx->data++;
533
534	0	if (rfc822_skip_lwsp(ctx) <= 0)
535	0	return -1;
536
537	0	key = t_str_new(64);
538	0	if (rfc822_parse_mime_token(ctx, key) <= 0)
539	0	return -1;
540
541	0	if (*ctx->data != '=')
542	0	return -1;
543	0	ctx->data++;
544
545	0	if ((ret = rfc822_skip_lwsp(ctx)) <= 0) {
546		/* broken / no value */
547	0	} else if (*ctx->data == '"') {
548	0	ret = rfc822_parse_quoted_string(ctx, value);
549	0	} else if (ctx->data < ctx->end && *ctx->data == '=') {
550		/* workaround for broken input:
551		name==?utf-8?b?...?= */
552	0	while (ctx->data < ctx->end && *ctx->data != ';' &&
553	0	ctx->data != ' ' && ctx->data != '\t' &&
554	0	ctx->data != '\r' && ctx->data != '\n') {
555	0	str_append_c(value, *ctx->data);
556	0	ctx->data++;
557	0	}
558	0	} else {
559	0	ret = rfc822_parse_mime_token(ctx, value);
560	0	}
561
562	0	*key_r = str_c(key);
563	0	return ret < 0 ? -1 : 1;
564	0	}