/src/dovecot/src/lib-charset/charset-utf8.c

Source
/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "buffer.h"
#include "str.h"
#include "charset-utf8-private.h"

#include <ctype.h>

const struct charset_utf8_vfuncs *charset_utf8_vfuncs = &charset_iconv;

bool charset_is_utf8(const char *charset)
{
  return strcasecmp(charset, "us-ascii") == 0 ||
    strcasecmp(charset, "ascii") == 0 ||
    strcasecmp(charset, "UTF-8") == 0 ||
    strcasecmp(charset, "UTF8") == 0;
}

int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
      const char *input, string_t *output,
      enum charset_result *result_r)
{
  struct charset_translation *t;
  size_t len = strlen(input);

  if (charset_to_utf8_begin(charset, normalizer, &t) < 0)
    return -1;

  *result_r = charset_to_utf8(t, (const unsigned char *)input,
            &len, output);
  charset_to_utf8_end(&t);
  return 0;
}

struct charset_translation *
charset_utf8_to_utf8_begin(normalizer_func_t *normalizer)
{
  struct charset_translation *trans;

  if (charset_to_utf8_begin("UTF-8", normalizer, &trans) < 0)
    i_unreached();
  return trans;
}

enum charset_result
charset_utf8_to_utf8(normalizer_func_t *normalizer,
         const unsigned char *src, size_t *src_size, buffer_t *dest)
{
  enum charset_result res = CHARSET_RET_OK;
  size_t pos;

  uni_utf8_partial_strlen_n(src, *src_size, &pos);
  if (pos < *src_size) {
    i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
    *src_size = pos;
    res = CHARSET_RET_INCOMPLETE_INPUT;
  }

  if (normalizer != NULL) {
    if (normalizer(src, *src_size, dest) < 0)
      return CHARSET_RET_INVALID_INPUT;
  } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
    return CHARSET_RET_INVALID_INPUT;
  } else {
    buffer_append(dest, src, *src_size);
  }
  return res;
}

int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
        struct charset_translation **t_r)
{
  return charset_utf8_vfuncs->to_utf8_begin(charset, normalizer, t_r);
}

void charset_to_utf8_end(struct charset_translation **_t)
{
  struct charset_translation *t = *_t;

  *_t = NULL;
  charset_utf8_vfuncs->to_utf8_end(t);
}

void charset_to_utf8_reset(struct charset_translation *t)
{
  charset_utf8_vfuncs->to_utf8_reset(t);
}

enum charset_result
charset_to_utf8(struct charset_translation *t,
    const unsigned char *src, size_t *src_size, buffer_t *dest)
{
  return charset_utf8_vfuncs->to_utf8(t, src, src_size, dest);
}

Line	Count	Source
1		/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2
3		#include "lib.h"
4		#include "buffer.h"
5		#include "str.h"
6		#include "charset-utf8-private.h"
7
8		#include <ctype.h>
9
10		const struct charset_utf8_vfuncs *charset_utf8_vfuncs = &charset_iconv;
11
12		bool charset_is_utf8(const char *charset)
13	13.2k	{
14	13.2k	return strcasecmp(charset, "us-ascii") == 0 \|\|
15	13.0k	strcasecmp(charset, "ascii") == 0 \|\|
16	12.7k	strcasecmp(charset, "UTF-8") == 0 \|\|
17	11.8k	strcasecmp(charset, "UTF8") == 0;
18	13.2k	}
19
20		int charset_to_utf8_str(const char charset, normalizer_func_t normalizer,
21		const char input, string_t output,
22		enum charset_result *result_r)
23	13.2k	{
24	13.2k	struct charset_translation *t;
25	13.2k	size_t len = strlen(input);
26
27	13.2k	if (charset_to_utf8_begin(charset, normalizer, &t) < 0)
28	6.00k	return -1;
29
30	7.27k	result_r = charset_to_utf8(t, (const unsigned char )input,
31	7.27k	&len, output);
32	7.27k	charset_to_utf8_end(&t);
33	7.27k	return 0;
34	13.2k	}
35
36		struct charset_translation *
37		charset_utf8_to_utf8_begin(normalizer_func_t *normalizer)
38	0	{
39	0	struct charset_translation *trans;
40
41	0	if (charset_to_utf8_begin("UTF-8", normalizer, &trans) < 0)
42	0	i_unreached();
43	0	return trans;
44	0	}
45
46		enum charset_result
47		charset_utf8_to_utf8(normalizer_func_t *normalizer,
48		const unsigned char src, size_t src_size, buffer_t *dest)
49	278k	{
50	278k	enum charset_result res = CHARSET_RET_OK;
51	278k	size_t pos;
52
53	278k	uni_utf8_partial_strlen_n(src, *src_size, &pos);
54	278k	if (pos < *src_size) {
55	243	i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
56	243	*src_size = pos;
57	243	res = CHARSET_RET_INCOMPLETE_INPUT;
58	243	}
59
60	278k	if (normalizer != NULL) {
61	0	if (normalizer(src, *src_size, dest) < 0)
62	0	return CHARSET_RET_INVALID_INPUT;
63	278k	} else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
64	2.84k	return CHARSET_RET_INVALID_INPUT;
65	275k	} else {
66	275k	buffer_append(dest, src, *src_size);
67	275k	}
68	275k	return res;
69	278k	}
70
71		int charset_to_utf8_begin(const char charset, normalizer_func_t normalizer,
72		struct charset_translation **t_r)
73	13.2k	{
74	13.2k	return charset_utf8_vfuncs->to_utf8_begin(charset, normalizer, t_r);
75	13.2k	}
76
77		void charset_to_utf8_end(struct charset_translation **_t)
78	7.27k	{
79	7.27k	struct charset_translation t = _t;
80
81	7.27k	*_t = NULL;
82	7.27k	charset_utf8_vfuncs->to_utf8_end(t);
83	7.27k	}
84
85		void charset_to_utf8_reset(struct charset_translation *t)
86	0	{
87	0	charset_utf8_vfuncs->to_utf8_reset(t);
88	0	}
89
90		enum charset_result
91		charset_to_utf8(struct charset_translation *t,
92		const unsigned char src, size_t src_size, buffer_t *dest)
93	7.27k	{
94	7.27k	return charset_utf8_vfuncs->to_utf8(t, src, src_size, dest);
95	7.27k	}

Coverage Report

Created: 2026-05-16 06:51