Coverage Report

Created: 2026-02-11 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hoextdown/src/escape.c
Line
Count
Source
1
#include "escape.h"
2
3
#include <assert.h>
4
#include <stdio.h>
5
#include <string.h>
6
7
8
30.1M
#define likely(x)       __builtin_expect((x),1)
9
#define unlikely(x)     __builtin_expect((x),0)
10
11
12
/*
13
 * The following characters will not be escaped:
14
 *
15
 *    -_.+!*'(),%#@?=;:/,+&$ alphanum
16
 *
17
 * Note that this character set is the addition of:
18
 *
19
 *  - The characters which are safe to be in an URL
20
 *  - The characters which are *not* safe to be in
21
 *  an URL because they are RESERVED characters.
22
 *
23
 * We assume (lazily) that any RESERVED char that
24
 * appears inside an URL is actually meant to
25
 * have its native function (i.e. as an URL
26
 * component/separator) and hence needs no escaping.
27
 *
28
 * There are two exceptions: the chacters & (amp)
29
 * and ' (single quote) do not appear in the table.
30
 * They are meant to appear in the URL as components,
31
 * yet they require special HTML-entity escaping
32
 * to generate valid HTML markup.
33
 *
34
 * All other characters will be escaped to %XX.
35
 *
36
 */
37
static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
38
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
  0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
41
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
42
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
44
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
46
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
};
55
56
void
57
hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
58
258k
{
59
258k
  static const char hex_chars[] = "0123456789ABCDEF";
60
258k
  size_t  i = 0, mark;
61
258k
  char hex_str[3];
62
63
258k
  hex_str[0] = '%';
64
65
6.45M
  while (i < size) {
66
6.29M
    mark = i;
67
19.0M
    while (i < size && HREF_SAFE[data[i]]) i++;
68
69
    /* Optimization for cases where there's nothing to escape */
70
6.29M
    if (mark == 0 && i >= size) {
71
32.6k
      hoedown_buffer_put(ob, data, size);
72
32.6k
      return;
73
32.6k
    }
74
75
6.25M
    if (likely(i > mark)) {
76
219k
      hoedown_buffer_put(ob, data + mark, i - mark);
77
219k
    }
78
79
    /* escaping */
80
6.25M
    if (i >= size)
81
65.0k
      break;
82
83
6.19M
    switch (data[i]) {
84
    /* amp appears all the time in URLs, but needs
85
     * HTML-entity escaping to be inside an href */
86
10.2k
    case '&':
87
10.2k
      HOEDOWN_BUFPUTSL(ob, "&amp;");
88
10.2k
      break;
89
90
    /* the single quote is a valid URL character
91
     * according to the standard; it needs HTML
92
     * entity escaping too */
93
9.57k
    case '\'':
94
9.57k
      HOEDOWN_BUFPUTSL(ob, "&#x27;");
95
9.57k
      break;
96
97
    /* the space can be escaped to %20 or a plus
98
     * sign. we're going with the generic escape
99
     * for now. the plus thing is more commonly seen
100
     * when building GET strings */
101
#if 0
102
    case ' ':
103
      hoedown_buffer_putc(ob, '+');
104
      break;
105
#endif
106
107
    /* every other character goes with a %XX escaping */
108
6.17M
    default:
109
6.17M
      hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
110
6.17M
      hex_str[2] = hex_chars[data[i] & 0xF];
111
6.17M
      hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
112
6.19M
    }
113
114
6.19M
    i++;
115
6.19M
  }
116
258k
}
117
118
119
/**
120
 * According to the OWASP rules:
121
 *
122
 * & --> &amp;
123
 * < --> &lt;
124
 * > --> &gt;
125
 * " --> &quot;
126
 * ' --> &#x27;     &apos; is not recommended
127
 * / --> &#x2F;     forward slash is included as it helps end an HTML entity
128
 *
129
 */
130
static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
131
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
133
  0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
134
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
135
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
142
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
143
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
144
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147
};
148
149
static const char *HTML_ESCAPES[] = {
150
        "",
151
        "&quot;",
152
        "&amp;",
153
        "&#39;",
154
        "&#47;",
155
        "&lt;",
156
        "&gt;"
157
};
158
159
void
160
hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
161
7.26M
{
162
7.26M
  size_t i = 0, mark;
163
164
30.2M
  while (1) {
165
30.2M
    mark = i;
166
1.02G
    while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
167
168
    /* Optimization for cases where there's nothing to escape */
169
30.2M
    if (mark == 0 && i >= size) {
170
6.33M
      hoedown_buffer_put(ob, data, size);
171
6.33M
      return;
172
6.33M
    }
173
174
23.9M
    if (likely(i > mark))
175
11.7M
      hoedown_buffer_put(ob, data + mark, i - mark);
176
177
23.9M
    if (i >= size) break;
178
179
    /* The forward slash is only escaped in secure mode */
180
22.9M
    if (!secure && data[i] == '/') {
181
296k
      hoedown_buffer_putc(ob, '/');
182
22.7M
    } else {
183
22.7M
      hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
184
22.7M
    }
185
186
22.9M
    i++;
187
22.9M
  }
188
7.26M
}