Coverage Report

Created: 2026-02-14 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/samba/lib/util/rfc1738.c
Line
Count
Source
1
/*
2
 * Functions for RFC 3986 percent-encoding.
3
 *
4
 * NOTE:
5
 *
6
 * This file was originally imported from the Squid project but has been
7
 * significantly altered. The licence below is reproduced intact, but refers
8
 * to files in Squid's repository, not in Samba. See COPYING for the GPLv3
9
 * notice (being the later version mentioned below).
10
 */
11
12
/*
13
 * $Id$
14
 *
15
 * DEBUG:
16
 * AUTHOR: Harvest Derived
17
 *
18
 * SQUID Web Proxy Cache          http://www.squid-cache.org/
19
 * ----------------------------------------------------------
20
 *
21
 *  Squid is the result of efforts by numerous individuals from
22
 *  the Internet community; see the CONTRIBUTORS file for full
23
 *  details.   Many organizations have provided support for Squid's
24
 *  development; see the SPONSORS file for full details.  Squid is
25
 *  Copyrighted (C) 2001 by the Regents of the University of
26
 *  California; see the COPYRIGHT file for full details.  Squid
27
 *  incorporates software developed and/or copyrighted by other
28
 *  sources; see the CREDITS file for full details.
29
 *
30
 *  This program is free software; you can redistribute it and/or modify
31
 *  it under the terms of the GNU General Public License as published by
32
 *  the Free Software Foundation; either version 2 of the License, or
33
 *  (at your option) any later version.
34
 *
35
 *  This program is distributed in the hope that it will be useful,
36
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
37
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
38
 *  GNU General Public License for more details.
39
 *
40
 *  You should have received a copy of the GNU General Public License
41
 *  along with this program; if not, write to the Free Software
42
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
43
 *
44
 */
45
46
#include "replace.h"
47
#include <talloc.h>
48
#include "lib/util/samba_util.h"
49
50
0
#define RFC1738_ENCODE 1
51
0
#define RFC1738_RESERVED 2
52
53
/*
54
 * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
55
 * that has been obsolete since 2004, we sm instead for RFC 3986, where:
56
 *
57
 *  reserved =    : / ? # [ ] @ ! $ & ' ( ) * + , ; =
58
 *  unreserved = ALPHA DIGIT - . _ ~
59
 *
60
 * and whatever is not in either of those are what RFC 1738 called "unsafe",
61
 * meaning that they should are canonically but not mandatorily escaped.
62
 *
63
 * Characters below 0x20 or above 0x7E are always encoded.
64
 */
65
66
static const unsigned char escapees[127] = {
67
  [' '] = RFC1738_ENCODE,
68
  ['"'] = RFC1738_ENCODE,
69
  ['%'] = RFC1738_ENCODE,
70
  ['<'] = RFC1738_ENCODE,
71
  ['>'] = RFC1738_ENCODE,
72
  ['\\'] = RFC1738_ENCODE,
73
  ['^'] = RFC1738_ENCODE,
74
  ['`'] = RFC1738_ENCODE,
75
  ['{'] = RFC1738_ENCODE,
76
  ['|'] = RFC1738_ENCODE,
77
  ['}'] = RFC1738_ENCODE,
78
  /* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
79
  [':'] = RFC1738_RESERVED,
80
  ['/'] = RFC1738_RESERVED,
81
  ['?'] = RFC1738_RESERVED,
82
  ['#'] = RFC1738_RESERVED,
83
  ['['] = RFC1738_RESERVED,
84
  [']'] = RFC1738_RESERVED,
85
  ['@'] = RFC1738_RESERVED,
86
  ['!'] = RFC1738_RESERVED,
87
  ['$'] = RFC1738_RESERVED,
88
  ['&'] = RFC1738_RESERVED,
89
  ['\''] = RFC1738_RESERVED,
90
  ['('] = RFC1738_RESERVED,
91
  [')'] = RFC1738_RESERVED,
92
  ['*'] = RFC1738_RESERVED,
93
  ['+'] = RFC1738_RESERVED,
94
  [','] = RFC1738_RESERVED,
95
  [';'] = RFC1738_RESERVED,
96
  ['='] = RFC1738_RESERVED,
97
};
98
99
/*
100
 *  rfc1738_do_escape - fills a preallocated buffer with an escaped version of
101
 *  the given string.
102
 *
103
 *  For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
104
 *  For mandatory escaping, mask should be RFC1738_RESERVED.
105
 */
106
static char *
107
rfc1738_do_escape(char *buf, size_t bufsize,
108
      const char *url, size_t len, unsigned char mask)
109
0
{
110
0
  size_t i;
111
0
  size_t j = 0;
112
0
  for (i = 0; i < len; i++) {
113
0
    unsigned int c = (unsigned char) url[i];
114
0
    if (c > 126 || c < 32 || (escapees[c] & mask)) {
115
0
      if (j + 3 >= bufsize) {
116
0
        return NULL;
117
0
      }
118
0
      (void) snprintf(&buf[j], 4, "%%%02X", c);
119
0
      j += 3;
120
0
    } else {
121
0
      if (j + 1 >= bufsize) {
122
0
        return NULL;
123
0
      }
124
0
      buf[j] = c;
125
0
      j++;
126
0
    }
127
0
  }
128
0
  buf[j] = '\0';
129
0
  return buf;
130
0
}
131
132
/*
133
 * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
134
 * compliant, escaped version of the given url segment.
135
 */
136
char *
137
rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
138
0
{
139
0
  size_t bufsize = 0;
140
0
  char *buf = NULL;
141
142
0
  size_t len = strlen(url);
143
0
  if (len >= SIZE_MAX / 3) {
144
0
    return NULL;
145
0
  }
146
147
0
  bufsize = len * 3 + 1;
148
0
  buf = talloc_array(mem_ctx, char, bufsize);
149
0
  if (buf == NULL) {
150
0
    return NULL;
151
0
  }
152
153
0
  talloc_set_name_const(buf, buf);
154
155
0
  return rfc1738_do_escape(buf, bufsize, url, len,
156
0
         RFC1738_ENCODE | RFC1738_RESERVED);
157
0
}
158
159
/*
160
 * rfc1738_unescape() - Converts url-escaped characters in the string.
161
 *
162
 * The two characters following a '%' in a string should be hex digits that
163
 * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
164
 * this is the only way to include a % in the unescaped string. Any character
165
 * can be escaped, including plain letters (e.g. "%61" for "a"). Anything
166
 * other than 2 hex characters following the % is an error.
167
 *
168
 * The conversion is done in-place, which is always safe as unescapes can only
169
 * shorten the string.
170
 *
171
 * Returns a pointer to the end of the string (that is, the '\0' byte), or
172
 * NULL on error, at which point s is in an undefined state.
173
 *
174
 * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
175
 * `e - s` if s originally contained "%00". You might want to check for this.
176
 */
177
178
_PUBLIC_ char *rfc1738_unescape(char *s)
179
0
{
180
0
  size_t i, j;      /* i is write, j is read */
181
0
  for (i = 0, j = 0; s[j] != '\0'; i++, j++) {
182
0
    if (s[j] == '%') {
183
0
      uint8_t v;
184
0
      bool ok;
185
186
0
      ok = hex_byte(&s[j+1], &v);
187
0
      if (!ok) {
188
0
        return NULL;
189
0
      }
190
0
      j += 2; /* OK; hex_byte() has checked ahead */
191
0
      s[i] = (unsigned char)v;
192
0
    } else {
193
0
      s[i] = s[j];
194
0
    }
195
0
  }
196
0
  s[i] = '\0';
197
0
  return s + i;
198
0
}