Coverage Report

Created: 2025-11-09 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/dovecot/src/lib/str-sanitize.c
Line
Count
Source
1
/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "unichar.h"
5
#include "str.h"
6
#include "str-sanitize.h"
7
#include <ctype.h>
8
9
static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
10
36.2k
{
11
36.2k
  unichar_t chr;
12
36.2k
  size_t i;
13
14
486k
  for (i = 0; i < max_bytes && src[i] != '\0'; ) {
15
455k
    int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
16
455k
    if (len <= 0)
17
3.77k
      break;
18
451k
    if (i_iscntrl(src[i]))
19
1.21k
      break;
20
450k
    i += len;
21
450k
  }
22
36.2k
  i_assert(i <= max_bytes);
23
36.2k
  return i;
24
36.2k
}
25
26
27
static size_t
28
str_sanitize_skip_start_utf8(const char *src, uintmax_t max_chars)
29
0
{
30
0
  unichar_t chr;
31
0
  uintmax_t c;
32
0
  size_t i;
33
34
0
  for (i = 0, c = 0; c < max_chars && src[i] != '\0'; ) {
35
0
    int len = uni_utf8_get_char(src+i, &chr);
36
0
    if (len <= 0)
37
0
      break;
38
0
    if (i_iscntrl(src[i]))
39
0
      break;
40
0
    c++;
41
0
    i += len;
42
0
  }
43
0
  i_assert(c <= max_chars);
44
0
  return i;
45
0
}
46
47
static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
48
6.60k
{
49
6.60k
  const unsigned char *data = str_data(dest);
50
6.60k
  size_t len = str_len(dest);
51
52
6.60k
  i_assert(len >= initial_pos);
53
6.60k
  if (len == initial_pos)
54
0
    return;
55
56
6.60k
  data += initial_pos;
57
6.60k
  len -= initial_pos;
58
6.60k
  str_truncate(dest, initial_pos +
59
6.60k
    uni_utf8_data_truncate(data, len, len-1));
60
6.60k
}
61
62
void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
63
6.81k
{
64
6.81k
  size_t initial_pos = str_len(dest);
65
6.81k
  unichar_t chr;
66
6.81k
  size_t i;
67
68
392k
  for (i = 0; i < max_bytes && src[i] != '\0'; ) {
69
385k
    int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
70
385k
    if (len == 0)
71
36
      break; /* input ended too early */
72
73
385k
    if (len < 0) {
74
      /* invalid UTF-8 */
75
34.9k
      str_append_c(dest, '?');
76
34.9k
      i++;
77
34.9k
      continue;
78
34.9k
    }
79
350k
    if (i_iscntrl(src[i]))
80
8.56k
      str_append_c(dest, '?');
81
341k
    else
82
341k
      str_append_data(dest, src+i, len);
83
350k
    i += len;
84
350k
  }
85
86
6.81k
  if (src[i] != '\0') {
87
2.22k
    if (max_bytes < 3)
88
0
      str_truncate(dest, initial_pos);
89
2.22k
    else {
90
8.82k
      while (str_len(dest) - initial_pos > max_bytes-3)
91
6.60k
        str_sanitize_truncate_char(dest, initial_pos);
92
2.22k
    }
93
2.22k
    str_append(dest, "...");
94
2.22k
  }
95
6.81k
}
96
97
void str_sanitize_append_utf8(string_t *dest, const char *src,
98
            uintmax_t max_cps)
99
0
{
100
0
  size_t last_pos = 0;
101
0
  unichar_t chr;
102
0
  uintmax_t c;
103
0
  size_t i;
104
105
0
  i_assert(max_cps > 0);
106
107
0
  for (i = 0, c = 0; c < max_cps && src[i] != '\0'; ) {
108
0
    int len = uni_utf8_get_char(src+i, &chr);
109
0
    if (len == 0)
110
0
      break; /* input ended too early */
111
112
0
    last_pos = str_len(dest);
113
0
    if (len < 0) {
114
      /* invalid UTF-8 */
115
0
      str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8);
116
0
      i++;
117
0
      continue;
118
0
    }
119
0
    if (i_iscntrl(src[i]))
120
0
      str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8);
121
0
    else
122
0
      str_append_data(dest, src+i, len);
123
0
    i += len;
124
0
    c++;
125
0
  }
126
127
0
  if (src[i] != '\0') {
128
0
    str_truncate(dest, last_pos);
129
0
    str_append(dest, UNICODE_HORIZONTAL_ELLIPSIS_CHAR_UTF8);
130
0
  }
131
0
}
132
133
const char *str_sanitize(const char *src, size_t max_bytes)
134
36.2k
{
135
36.2k
  string_t *str;
136
36.2k
  size_t i;
137
138
36.2k
  if (src == NULL)
139
0
    return NULL;
140
141
36.2k
  i = str_sanitize_skip_start(src, max_bytes);
142
36.2k
  if (src[i] == '\0')
143
29.4k
    return src;
144
145
6.81k
  str = t_str_new(I_MIN(max_bytes, 256));
146
6.81k
  str_sanitize_append(str, src, max_bytes);
147
6.81k
  return str_c(str);
148
36.2k
}
149
150
const char *str_sanitize_utf8(const char *src, uintmax_t max_cps)
151
0
{
152
0
  string_t *str;
153
0
  size_t i;
154
155
0
  if (src == NULL)
156
0
    return NULL;
157
158
0
  i = str_sanitize_skip_start_utf8(src, max_cps);
159
0
  if (src[i] == '\0')
160
0
    return src;
161
162
0
  str = t_str_new(I_MIN(max_cps, 256));
163
0
  str_sanitize_append_utf8(str, src, max_cps);
164
0
  return str_c(str);
165
0
}
166