/src/samba/lib/ldb/common/ldb_utf8.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | ldb database library |
3 | | |
4 | | Copyright (C) Andrew Tridgell 2004 |
5 | | |
6 | | ** NOTE! The following LGPL license applies to the ldb |
7 | | ** library. This does NOT imply that all of Samba is released |
8 | | ** under the LGPL |
9 | | |
10 | | This library is free software; you can redistribute it and/or |
11 | | modify it under the terms of the GNU Lesser General Public |
12 | | License as published by the Free Software Foundation; either |
13 | | version 3 of the License, or (at your option) any later version. |
14 | | |
15 | | This library is distributed in the hope that it will be useful, |
16 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | | Lesser General Public License for more details. |
19 | | |
20 | | You should have received a copy of the GNU Lesser General Public |
21 | | License along with this library; if not, see <http://www.gnu.org/licenses/>. |
22 | | */ |
23 | | |
24 | | /* |
25 | | * Name: ldb |
26 | | * |
27 | | * Component: ldb utf8 handling |
28 | | * |
29 | | * Description: case folding and case comparison for UTF8 strings |
30 | | * |
31 | | * Author: Andrew Tridgell |
32 | | */ |
33 | | |
34 | | #include "ldb_private.h" |
35 | | #include "system/locale.h" |
36 | | |
37 | | /* |
38 | | * Set functions for comparing and case-folding case-insensitive ldb val |
39 | | * strings. |
40 | | */ |
41 | | void ldb_set_utf8_functions(struct ldb_context *ldb, |
42 | | void *context, |
43 | | char *(*casefold)(void *, void *, const char *, size_t), |
44 | | int (*casecmp)(void *ctx, |
45 | | const struct ldb_val *v1, |
46 | | const struct ldb_val *v2)) |
47 | 0 | { |
48 | 0 | if (context) { |
49 | 0 | ldb->utf8_fns.context = context; |
50 | 0 | } |
51 | 0 | if (casefold) { |
52 | 0 | ldb->utf8_fns.casefold = casefold; |
53 | 0 | } |
54 | 0 | if (casecmp) { |
55 | 0 | ldb->utf8_fns.casecmp = casecmp; |
56 | 0 | } |
57 | 0 | } |
58 | | |
59 | | /* |
60 | | this allow the user to pass in a caseless comparison |
61 | | function to handle utf8 caseless comparisons |
62 | | */ |
63 | | void ldb_set_utf8_fns(struct ldb_context *ldb, |
64 | | void *context, |
65 | | char *(*casefold)(void *, void *, const char *, size_t)) |
66 | 0 | { |
67 | 0 | ldb_set_utf8_functions(ldb, context, casefold, NULL); |
68 | 0 | } |
69 | | |
70 | | |
71 | | /* |
72 | | a simple case folding function |
73 | | NOTE: does not handle UTF8 |
74 | | */ |
75 | | char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n) |
76 | 0 | { |
77 | 0 | size_t i; |
78 | 0 | char *ret = talloc_strndup(mem_ctx, s, n); |
79 | 0 | if (!s) { |
80 | 0 | errno = ENOMEM; |
81 | 0 | return NULL; |
82 | 0 | } |
83 | 0 | for (i=0;ret[i];i++) { |
84 | 0 | ret[i] = ldb_ascii_toupper(ret[i]); |
85 | 0 | } |
86 | 0 | return ret; |
87 | 0 | } |
88 | | |
89 | | |
90 | | /* |
91 | | * The default comparison fold function only knows ASCII. Multiple |
92 | | * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All |
93 | | * other bytes are compared without casefolding. |
94 | | * |
95 | | * Note that as well as not handling UTF-8, this function does not exactly |
96 | | * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B). |
97 | | */ |
98 | | |
99 | | int ldb_comparison_fold_ascii(void *ignored, |
100 | | const struct ldb_val *v1, |
101 | | const struct ldb_val *v2) |
102 | 0 | { |
103 | 0 | const uint8_t *s1 = v1->data; |
104 | 0 | const uint8_t *s2 = v2->data; |
105 | 0 | size_t n1 = v1->length, n2 = v2->length; |
106 | |
|
107 | 0 | while (n1 && *s1 == ' ') { s1++; n1--; }; |
108 | 0 | while (n2 && *s2 == ' ') { s2++; n2--; }; |
109 | |
|
110 | 0 | while (n1 && n2 && *s1 && *s2) { |
111 | 0 | if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) { |
112 | 0 | break; |
113 | 0 | } |
114 | 0 | if (*s1 == ' ') { |
115 | 0 | while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; } |
116 | 0 | while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; } |
117 | 0 | } |
118 | 0 | s1++; s2++; |
119 | 0 | n1--; n2--; |
120 | 0 | } |
121 | | |
122 | | /* check for trailing spaces only if the other pointers has |
123 | | * reached the end of the strings otherwise we can |
124 | | * mistakenly match. ex. "domain users" <-> |
125 | | * "domainUpdates" |
126 | | */ |
127 | 0 | if (n1 && *s1 == ' ' && (!n2 || !*s2)) { |
128 | 0 | while (n1 && *s1 == ' ') { s1++; n1--; } |
129 | 0 | } |
130 | 0 | if (n2 && *s2 == ' ' && (!n1 || !*s1)) { |
131 | 0 | while (n2 && *s2 == ' ') { s2++; n2--; } |
132 | 0 | } |
133 | 0 | if (n1 == 0 && n2 != 0) { |
134 | 0 | return *s2 ? -1 : 0; |
135 | 0 | } |
136 | 0 | if (n2 == 0 && n1 != 0) { |
137 | 0 | return *s1 ? 1 : 0; |
138 | 0 | } |
139 | 0 | if (n1 == 0 && n2 == 0) { |
140 | 0 | return 0; |
141 | 0 | } |
142 | 0 | return NUMERIC_CMP(*s1, *s2); |
143 | 0 | } |
144 | | |
145 | | void ldb_set_utf8_default(struct ldb_context *ldb) |
146 | 0 | { |
147 | 0 | ldb_set_utf8_functions(ldb, NULL, |
148 | 0 | ldb_casefold_default, |
149 | 0 | ldb_comparison_fold_ascii); |
150 | 0 | } |
151 | | |
152 | | char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n) |
153 | 0 | { |
154 | 0 | return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n); |
155 | 0 | } |
156 | | |
157 | | /* |
158 | | check the attribute name is valid according to rfc2251 |
159 | | returns 1 if the name is ok |
160 | | */ |
161 | | |
162 | | int ldb_valid_attr_name(const char *s) |
163 | 0 | { |
164 | 0 | size_t i; |
165 | |
|
166 | 0 | if (!s || !s[0]) |
167 | 0 | return 0; |
168 | | |
169 | | /* handle special ldb_tdb wildcard */ |
170 | 0 | if (strcmp(s, "*") == 0) return 1; |
171 | | |
172 | 0 | for (i = 0; s[i]; i++) { |
173 | 0 | if (! isascii(s[i])) { |
174 | 0 | return 0; |
175 | 0 | } |
176 | 0 | if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */ |
177 | 0 | if (! (isalpha(s[i]) || (s[i] == '@'))) { |
178 | 0 | return 0; |
179 | 0 | } |
180 | 0 | } else { |
181 | 0 | if (! (isalnum(s[i]) || (s[i] == '-'))) { |
182 | 0 | return 0; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | } |
186 | 0 | return 1; |
187 | 0 | } |
188 | | |
189 | | char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s) |
190 | 0 | { |
191 | 0 | size_t i; |
192 | 0 | char *ret = talloc_strdup(mem_ctx, s); |
193 | 0 | if (!ret) { |
194 | 0 | errno = ENOMEM; |
195 | 0 | return NULL; |
196 | 0 | } |
197 | 0 | for (i = 0; ret[i]; i++) { |
198 | 0 | ret[i] = ldb_ascii_toupper(ret[i]); |
199 | 0 | } |
200 | 0 | return ret; |
201 | 0 | } |
202 | | |
203 | | /* |
204 | | we accept either 'dn' or 'distinguishedName' for a distinguishedName |
205 | | */ |
206 | | int ldb_attr_dn(const char *attr) |
207 | 0 | { |
208 | 0 | if (ldb_attr_cmp(attr, "dn") == 0 || |
209 | 0 | ldb_attr_cmp(attr, "distinguishedName") == 0) { |
210 | 0 | return 0; |
211 | 0 | } |
212 | 0 | return -1; |
213 | 0 | } |
214 | | |
215 | 0 | _PRIVATE_ char ldb_ascii_toupper(char c) { |
216 | | /* |
217 | | * We are aiming for a 1970s C-locale toupper(), when all letters |
218 | | * were 7-bit and behaved with true American spirit. |
219 | | * |
220 | | * For example, we don't want the "i" in "<guid=" to be upper-cased to |
221 | | * "İ" as would happen in some locales, or we won't be able to parse |
222 | | * that properly. This is unfortunate for cases where we are dealing |
223 | | * with real text; a search for the name "Ali" would need to be |
224 | | * written "Alİ" to match. |
225 | | */ |
226 | 0 | return ('a' <= c && c <= 'z') ? c ^ 0x20 : c; |
227 | 0 | } |