Coverage Report

Created: 2025-07-23 07:04

/src/samba/lib/ldb/common/ldb_utf8.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   ldb database library
3
4
   Copyright (C) Andrew Tridgell  2004
5
6
     ** NOTE! The following LGPL license applies to the ldb
7
     ** library. This does NOT imply that all of Samba is released
8
     ** under the LGPL
9
10
   This library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Lesser General Public
12
   License as published by the Free Software Foundation; either
13
   version 3 of the License, or (at your option) any later version.
14
15
   This library is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
   Lesser General Public License for more details.
19
20
   You should have received a copy of the GNU Lesser General Public
21
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
22
*/
23
24
/*
25
 *  Name: ldb
26
 *
27
 *  Component: ldb utf8 handling
28
 *
29
 *  Description: case folding and case comparison for UTF8 strings
30
 *
31
 *  Author: Andrew Tridgell
32
 */
33
34
#include "ldb_private.h"
35
#include "system/locale.h"
36
37
/*
38
 * Set functions for comparing and case-folding case-insensitive ldb val
39
 * strings.
40
 */
41
void ldb_set_utf8_functions(struct ldb_context *ldb,
42
          void *context,
43
          char *(*casefold)(void *, void *, const char *, size_t),
44
          int (*casecmp)(void *ctx,
45
             const struct ldb_val *v1,
46
             const struct ldb_val *v2))
47
0
{
48
0
  if (context) {
49
0
    ldb->utf8_fns.context = context;
50
0
  }
51
0
  if (casefold) {
52
0
    ldb->utf8_fns.casefold = casefold;
53
0
  }
54
0
  if (casecmp) {
55
0
    ldb->utf8_fns.casecmp = casecmp;
56
0
  }
57
0
}
58
59
/*
60
  this allow the user to pass in a caseless comparison
61
  function to handle utf8 caseless comparisons
62
 */
63
void ldb_set_utf8_fns(struct ldb_context *ldb,
64
          void *context,
65
          char *(*casefold)(void *, void *, const char *, size_t))
66
0
{
67
0
  ldb_set_utf8_functions(ldb, context, casefold, NULL);
68
0
}
69
70
71
/*
72
  a simple case folding function
73
  NOTE: does not handle UTF8
74
*/
75
char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n)
76
0
{
77
0
  size_t i;
78
0
  char *ret = talloc_strndup(mem_ctx, s, n);
79
0
  if (!s) {
80
0
    errno = ENOMEM;
81
0
    return NULL;
82
0
  }
83
0
  for (i=0;ret[i];i++) {
84
0
    ret[i] = ldb_ascii_toupper(ret[i]);
85
0
  }
86
0
  return ret;
87
0
}
88
89
90
/*
91
 * The default comparison fold function only knows ASCII. Multiple
92
 * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
93
 * other bytes are compared without casefolding.
94
 *
95
 * Note that as well as not handling UTF-8, this function does not exactly
96
 * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
97
 */
98
99
int ldb_comparison_fold_ascii(void *ignored,
100
            const struct ldb_val *v1,
101
            const struct ldb_val *v2)
102
0
{
103
0
  const uint8_t *s1 = v1->data;
104
0
  const uint8_t *s2 = v2->data;
105
0
  size_t n1 = v1->length, n2 = v2->length;
106
107
0
  while (n1 && *s1 == ' ') { s1++; n1--; };
108
0
  while (n2 && *s2 == ' ') { s2++; n2--; };
109
110
0
  while (n1 && n2 && *s1 && *s2) {
111
0
    if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
112
0
      break;
113
0
    }
114
0
    if (*s1 == ' ') {
115
0
      while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
116
0
      while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
117
0
    }
118
0
    s1++; s2++;
119
0
    n1--; n2--;
120
0
  }
121
122
  /* check for trailing spaces only if the other pointers has
123
   * reached the end of the strings otherwise we can
124
   * mistakenly match.  ex. "domain users" <->
125
   * "domainUpdates"
126
   */
127
0
  if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
128
0
    while (n1 && *s1 == ' ') { s1++; n1--; }
129
0
  }
130
0
  if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
131
0
    while (n2 && *s2 == ' ') { s2++; n2--; }
132
0
  }
133
0
  if (n1 == 0 && n2 != 0) {
134
0
    return *s2 ? -1 : 0;
135
0
  }
136
0
  if (n2 == 0 && n1 != 0) {
137
0
    return *s1 ? 1 : 0;
138
0
  }
139
0
  if (n1 == 0 && n2 == 0) {
140
0
    return 0;
141
0
  }
142
0
  return NUMERIC_CMP(*s1, *s2);
143
0
}
144
145
void ldb_set_utf8_default(struct ldb_context *ldb)
146
0
{
147
0
  ldb_set_utf8_functions(ldb, NULL,
148
0
        ldb_casefold_default,
149
0
        ldb_comparison_fold_ascii);
150
0
}
151
152
char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n)
153
0
{
154
0
  return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
155
0
}
156
157
/*
158
  check the attribute name is valid according to rfc2251
159
  returns 1 if the name is ok
160
 */
161
162
int ldb_valid_attr_name(const char *s)
163
0
{
164
0
  size_t i;
165
166
0
  if (!s || !s[0])
167
0
    return 0;
168
169
  /* handle special ldb_tdb wildcard */
170
0
  if (strcmp(s, "*") == 0) return 1;
171
172
0
  for (i = 0; s[i]; i++) {
173
0
    if (! isascii(s[i])) {
174
0
      return 0;
175
0
    }
176
0
    if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
177
0
      if (! (isalpha(s[i]) || (s[i] == '@'))) {
178
0
        return 0;
179
0
      }
180
0
    } else {
181
0
      if (! (isalnum(s[i]) || (s[i] == '-'))) {
182
0
        return 0;
183
0
      }
184
0
    }
185
0
  }
186
0
  return 1;
187
0
}
188
189
char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s)
190
0
{
191
0
  size_t i;
192
0
  char *ret = talloc_strdup(mem_ctx, s);
193
0
  if (!ret) {
194
0
    errno = ENOMEM;
195
0
    return NULL;
196
0
  }
197
0
  for (i = 0; ret[i]; i++) {
198
0
    ret[i] = ldb_ascii_toupper(ret[i]);
199
0
  }
200
0
  return ret;
201
0
}
202
203
/*
204
  we accept either 'dn' or 'distinguishedName' for a distinguishedName
205
*/
206
int ldb_attr_dn(const char *attr)
207
0
{
208
0
  if (ldb_attr_cmp(attr, "dn") == 0 ||
209
0
      ldb_attr_cmp(attr, "distinguishedName") == 0) {
210
0
    return 0;
211
0
  }
212
0
  return -1;
213
0
}
214
215
0
_PRIVATE_ char ldb_ascii_toupper(char c) {
216
  /*
217
   * We are aiming for a 1970s C-locale toupper(), when all letters
218
   * were 7-bit and behaved with true American spirit.
219
   *
220
   * For example, we don't want the "i" in "<guid=" to be upper-cased to
221
   * "İ" as would happen in some locales, or we won't be able to parse
222
   * that properly. This is unfortunate for cases where we are dealing
223
   * with real text; a search for the name "Ali" would need to be
224
   * written "Alİ" to match.
225
   */
226
0
  return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;
227
0
}