Coverage Report

Created: 2026-03-10 07:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/dovecot/src/lib/str-sanitize.c
Line
Count
Source
1
/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "unichar.h"
5
#include "str.h"
6
#include "str-sanitize.h"
7
#include <ctype.h>
8
9
static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
10
12.3k
{
11
12.3k
  unichar_t chr;
12
12.3k
  size_t i;
13
14
231k
  for (i = 0; i < max_bytes && src[i] != '\0'; ) {
15
218k
    int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
16
218k
    if (len <= 0)
17
160
      break;
18
218k
    if (i_iscntrl(src[i]))
19
34
      break;
20
218k
    i += len;
21
218k
  }
22
12.3k
  i_assert(i <= max_bytes);
23
12.3k
  return i;
24
12.3k
}
25
26
27
static size_t
28
str_sanitize_skip_start_utf8(const char *src, uintmax_t max_chars)
29
0
{
30
0
  unichar_t chr;
31
0
  uintmax_t c;
32
0
  size_t i;
33
34
0
  for (i = 0, c = 0; c < max_chars && src[i] != '\0'; ) {
35
0
    int len = uni_utf8_get_char(src+i, &chr);
36
0
    if (len <= 0)
37
0
      break;
38
0
    if (i_iscntrl(src[i]))
39
0
      break;
40
0
    c++;
41
0
    i += len;
42
0
  }
43
0
  i_assert(c <= max_chars);
44
0
  return i;
45
0
}
46
47
static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
48
4.01k
{
49
4.01k
  const unsigned char *data = str_data(dest);
50
4.01k
  size_t len = str_len(dest);
51
52
4.01k
  i_assert(len >= initial_pos);
53
4.01k
  if (len == initial_pos)
54
0
    return;
55
56
4.01k
  data += initial_pos;
57
4.01k
  len -= initial_pos;
58
4.01k
  str_truncate(dest, initial_pos +
59
4.01k
    uni_utf8_data_truncate(data, len, len-1));
60
4.01k
}
61
62
void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
63
1.51k
{
64
1.51k
  size_t initial_pos = str_len(dest);
65
1.51k
  unichar_t chr;
66
1.51k
  size_t i;
67
68
172k
  for (i = 0; i < max_bytes && src[i] != '\0'; ) {
69
170k
    int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
70
170k
    if (len == 0)
71
9
      break; /* input ended too early */
72
73
170k
    if (len < 0) {
74
      /* invalid UTF-8 */
75
1.17k
      str_append_c(dest, '?');
76
1.17k
      i++;
77
1.17k
      continue;
78
1.17k
    }
79
169k
    if (i_iscntrl(src[i]))
80
128
      str_append_c(dest, '?');
81
169k
    else
82
169k
      str_append_data(dest, src+i, len);
83
169k
    i += len;
84
169k
  }
85
86
1.51k
  if (src[i] != '\0') {
87
1.34k
    if (max_bytes < 3)
88
0
      str_truncate(dest, initial_pos);
89
1.34k
    else {
90
5.35k
      while (str_len(dest) - initial_pos > max_bytes-3)
91
4.01k
        str_sanitize_truncate_char(dest, initial_pos);
92
1.34k
    }
93
1.34k
    str_append(dest, "...");
94
1.34k
  }
95
1.51k
}
96
97
void str_sanitize_append_utf8(string_t *dest, const char *src,
98
            uintmax_t max_cps)
99
0
{
100
0
  size_t last_pos = 0;
101
0
  unichar_t chr;
102
0
  uintmax_t c;
103
0
  size_t i;
104
105
0
  i_assert(max_cps > 0);
106
107
0
  for (i = 0, c = 0; c < max_cps && src[i] != '\0'; ) {
108
0
    int len = uni_utf8_get_char(src+i, &chr);
109
0
    if (len == 0)
110
0
      break; /* input ended too early */
111
112
0
    last_pos = str_len(dest);
113
0
    if (len < 0) {
114
      /* invalid UTF-8 */
115
0
      str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8);
116
0
      i++;
117
0
      continue;
118
0
    }
119
0
    if (i_iscntrl(src[i]))
120
0
      str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8);
121
0
    else
122
0
      str_append_data(dest, src+i, len);
123
0
    i += len;
124
0
    c++;
125
0
  }
126
127
0
  if (src[i] != '\0') {
128
0
    str_truncate(dest, last_pos);
129
0
    str_append(dest, UNICODE_HORIZONTAL_ELLIPSIS_CHAR_UTF8);
130
0
  }
131
0
}
132
133
const char *str_sanitize(const char *src, size_t max_bytes)
134
12.3k
{
135
12.3k
  string_t *str;
136
12.3k
  size_t i;
137
138
12.3k
  if (src == NULL)
139
0
    return NULL;
140
141
12.3k
  i = str_sanitize_skip_start(src, max_bytes);
142
12.3k
  if (src[i] == '\0')
143
10.8k
    return src;
144
145
1.51k
  str = t_str_new(I_MIN(max_bytes, 256));
146
1.51k
  str_sanitize_append(str, src, max_bytes);
147
1.51k
  return str_c(str);
148
12.3k
}
149
150
const char *str_sanitize_utf8(const char *src, uintmax_t max_cps)
151
0
{
152
0
  string_t *str;
153
0
  size_t i;
154
155
0
  if (src == NULL)
156
0
    return NULL;
157
158
0
  i = str_sanitize_skip_start_utf8(src, max_cps);
159
0
  if (src[i] == '\0')
160
0
    return src;
161
162
0
  str = t_str_new(I_MIN(max_cps, 256));
163
0
  str_sanitize_append_utf8(str, src, max_cps);
164
0
  return str_c(str);
165
0
}
166