/src/dovecot/src/lib-charset/charset-utf8.c

Source
/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "buffer.h"
#include "str.h"
#include "charset-utf8-private.h"

#include <ctype.h>

const struct charset_utf8_vfuncs *charset_utf8_vfuncs = &charset_iconv;

bool charset_is_utf8(const char *charset)
{
  return strcasecmp(charset, "us-ascii") == 0 ||
    strcasecmp(charset, "ascii") == 0 ||
    strcasecmp(charset, "UTF-8") == 0 ||
    strcasecmp(charset, "UTF8") == 0;
}

int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
      const char *input, string_t *output,
      enum charset_result *result_r)
{
  struct charset_translation *t;
  size_t len = strlen(input);

  if (charset_to_utf8_begin(charset, normalizer, &t) < 0)
    return -1;

  *result_r = charset_to_utf8(t, (const unsigned char *)input,
            &len, output);
  charset_to_utf8_end(&t);
  return 0;
}

struct charset_translation *
charset_utf8_to_utf8_begin(normalizer_func_t *normalizer)
{
  struct charset_translation *trans;

  if (charset_to_utf8_begin("UTF-8", normalizer, &trans) < 0)
    i_unreached();
  return trans;
}

enum charset_result
charset_utf8_to_utf8(normalizer_func_t *normalizer,
         const unsigned char *src, size_t *src_size, buffer_t *dest)
{
  enum charset_result res = CHARSET_RET_OK;
  size_t pos;

  uni_utf8_partial_strlen_n(src, *src_size, &pos);
  if (pos < *src_size) {
    i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
    *src_size = pos;
    res = CHARSET_RET_INCOMPLETE_INPUT;
  }

  if (normalizer != NULL) {
    if (normalizer(src, *src_size, dest) < 0)
      return CHARSET_RET_INVALID_INPUT;
  } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
    return CHARSET_RET_INVALID_INPUT;
  } else {
    buffer_append(dest, src, *src_size);
  }
  return res;
}

int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
        struct charset_translation **t_r)
{
  return charset_utf8_vfuncs->to_utf8_begin(charset, normalizer, t_r);
}

void charset_to_utf8_end(struct charset_translation **_t)
{
  struct charset_translation *t = *_t;

  *_t = NULL;
  charset_utf8_vfuncs->to_utf8_end(t);
}

void charset_to_utf8_reset(struct charset_translation *t)
{
  charset_utf8_vfuncs->to_utf8_reset(t);
}

enum charset_result
charset_to_utf8(struct charset_translation *t,
    const unsigned char *src, size_t *src_size, buffer_t *dest)
{
  return charset_utf8_vfuncs->to_utf8(t, src, src_size, dest);
}

Line	Count	Source
1		/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2
3		#include "lib.h"
4		#include "buffer.h"
5		#include "str.h"
6		#include "charset-utf8-private.h"
7
8		#include <ctype.h>
9
10		const struct charset_utf8_vfuncs *charset_utf8_vfuncs = &charset_iconv;
11
12		bool charset_is_utf8(const char *charset)
13	427k	{
14	427k	return strcasecmp(charset, "us-ascii") == 0 \|\|
15	426k	strcasecmp(charset, "ascii") == 0 \|\|
16	425k	strcasecmp(charset, "UTF-8") == 0 \|\|
17	60.8k	strcasecmp(charset, "UTF8") == 0;
18	427k	}
19
20		int charset_to_utf8_str(const char charset, normalizer_func_t normalizer,
21		const char input, string_t output,
22		enum charset_result *result_r)
23	22.9k	{
24	22.9k	struct charset_translation *t;
25	22.9k	size_t len = strlen(input);
26
27	22.9k	if (charset_to_utf8_begin(charset, normalizer, &t) < 0)
28	10.9k	return -1;
29
30	11.9k	result_r = charset_to_utf8(t, (const unsigned char )input,
31	11.9k	&len, output);
32	11.9k	charset_to_utf8_end(&t);
33	11.9k	return 0;
34	22.9k	}
35
36		struct charset_translation *
37		charset_utf8_to_utf8_begin(normalizer_func_t *normalizer)
38	684	{
39	684	struct charset_translation *trans;
40
41	684	if (charset_to_utf8_begin("UTF-8", normalizer, &trans) < 0)
42	0	i_unreached();
43	684	return trans;
44	684	}
45
46		enum charset_result
47		charset_utf8_to_utf8(normalizer_func_t *normalizer,
48		const unsigned char src, size_t src_size, buffer_t *dest)
49	7.35M	{
50	7.35M	enum charset_result res = CHARSET_RET_OK;
51	7.35M	size_t pos;
52
53	7.35M	uni_utf8_partial_strlen_n(src, *src_size, &pos);
54	7.35M	if (pos < *src_size) {
55	4.88k	i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
56	4.88k	*src_size = pos;
57	4.88k	res = CHARSET_RET_INCOMPLETE_INPUT;
58	4.88k	}
59
60	7.35M	if (normalizer != NULL) {
61	1.22M	if (normalizer(src, *src_size, dest) < 0)
62	8.25k	return CHARSET_RET_INVALID_INPUT;
63	6.12M	} else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
64	5.91k	return CHARSET_RET_INVALID_INPUT;
65	6.12M	} else {
66	6.12M	buffer_append(dest, src, *src_size);
67	6.12M	}
68	7.33M	return res;
69	7.35M	}
70
71		int charset_to_utf8_begin(const char charset, normalizer_func_t normalizer,
72		struct charset_translation **t_r)
73	407k	{
74	407k	return charset_utf8_vfuncs->to_utf8_begin(charset, normalizer, t_r);
75	407k	}
76
77		void charset_to_utf8_end(struct charset_translation **_t)
78	393k	{
79	393k	struct charset_translation t = _t;
80
81	393k	*_t = NULL;
82	393k	charset_utf8_vfuncs->to_utf8_end(t);
83	393k	}
84
85		void charset_to_utf8_reset(struct charset_translation *t)
86	784	{
87	784	charset_utf8_vfuncs->to_utf8_reset(t);
88	784	}
89
90		enum charset_result
91		charset_to_utf8(struct charset_translation *t,
92		const unsigned char src, size_t src_size, buffer_t *dest)
93	56.2k	{
94	56.2k	return charset_utf8_vfuncs->to_utf8(t, src, src_size, dest);
95	56.2k	}

Coverage Report

Created: 2026-04-12 07:00