/src/samba/lib/ldb/common/ldb_utf8.c

Source (jump to first uncovered line)
/*
   ldb database library

   Copyright (C) Andrew Tridgell  2004

     ** NOTE! The following LGPL license applies to the ldb
     ** library. This does NOT imply that all of Samba is released
     ** under the LGPL

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

/*
 *  Name: ldb
 *
 *  Component: ldb utf8 handling
 *
 *  Description: case folding and case comparison for UTF8 strings
 *
 *  Author: Andrew Tridgell
 */

#include "ldb_private.h"
#include "system/locale.h"

/*
 * Set functions for comparing and case-folding case-insensitive ldb val
 * strings.
 */
void ldb_set_utf8_functions(struct ldb_context *ldb,
          void *context,
          char *(*casefold)(void *, void *, const char *, size_t),
          int (*casecmp)(void *ctx,
             const struct ldb_val *v1,
             const struct ldb_val *v2))
{
  if (context) {
    ldb->utf8_fns.context = context;
  }
  if (casefold) {
    ldb->utf8_fns.casefold = casefold;
  }
  if (casecmp) {
    ldb->utf8_fns.casecmp = casecmp;
  }
}

/*
  this allow the user to pass in a caseless comparison
  function to handle utf8 caseless comparisons
 */
void ldb_set_utf8_fns(struct ldb_context *ldb,
          void *context,
          char *(*casefold)(void *, void *, const char *, size_t))
{
  ldb_set_utf8_functions(ldb, context, casefold, NULL);
}


/*
  a simple case folding function
  NOTE: does not handle UTF8
*/
char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n)
{
  size_t i;
  char *ret = talloc_strndup(mem_ctx, s, n);
  if (!s) {
    errno = ENOMEM;
    return NULL;
  }
  for (i=0;ret[i];i++) {
    ret[i] = ldb_ascii_toupper(ret[i]);
  }
  return ret;
}


/*
 * The default comparison fold function only knows ASCII. Multiple
 * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
 * other bytes are compared without casefolding.
 *
 * Note that as well as not handling UTF-8, this function does not exactly
 * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
 */

int ldb_comparison_fold_ascii(void *ignored,
            const struct ldb_val *v1,
            const struct ldb_val *v2)
{
  const uint8_t *s1 = v1->data;
  const uint8_t *s2 = v2->data;
  size_t n1 = v1->length, n2 = v2->length;

  while (n1 && *s1 == ' ') { s1++; n1--; };
  while (n2 && *s2 == ' ') { s2++; n2--; };

  while (n1 && n2 && *s1 && *s2) {
    if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
      break;
    }
    if (*s1 == ' ') {
      while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
      while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
    }
    s1++; s2++;
    n1--; n2--;
  }

  /* check for trailing spaces only if the other pointers has
   * reached the end of the strings otherwise we can
   * mistakenly match.  ex. "domain users" <->
   * "domainUpdates"
   */
  if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
    while (n1 && *s1 == ' ') { s1++; n1--; }
  }
  if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
    while (n2 && *s2 == ' ') { s2++; n2--; }
  }
  if (n1 == 0 && n2 != 0) {
    return *s2 ? -1 : 0;
  }
  if (n2 == 0 && n1 != 0) {
    return *s1 ? 1 : 0;
  }
  if (n1 == 0 && n2 == 0) {
    return 0;
  }
  return NUMERIC_CMP(*s1, *s2);
}

void ldb_set_utf8_default(struct ldb_context *ldb)
{
  ldb_set_utf8_functions(ldb, NULL,
        ldb_casefold_default,
        ldb_comparison_fold_ascii);
}

char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n)
{
  return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
}

/*
  check the attribute name is valid according to rfc2251
  returns 1 if the name is ok
 */

int ldb_valid_attr_name(const char *s)
{
  size_t i;

  if (!s || !s[0])
    return 0;

  /* handle special ldb_tdb wildcard */
  if (strcmp(s, "*") == 0) return 1;

  for (i = 0; s[i]; i++) {
    if (! isascii(s[i])) {
      return 0;
    }
    if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
      if (! (isalpha(s[i]) || (s[i] == '@'))) {
        return 0;
      }
    } else {
      if (! (isalnum(s[i]) || (s[i] == '-'))) {
        return 0;
      }
    }
  }
  return 1;
}

char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s)
{
  size_t i;
  char *ret = talloc_strdup(mem_ctx, s);
  if (!ret) {
    errno = ENOMEM;
    return NULL;
  }
  for (i = 0; ret[i]; i++) {
    ret[i] = ldb_ascii_toupper(ret[i]);
  }
  return ret;
}

/*
  we accept either 'dn' or 'distinguishedName' for a distinguishedName
*/
int ldb_attr_dn(const char *attr)
{
  if (ldb_attr_cmp(attr, "dn") == 0 ||
      ldb_attr_cmp(attr, "distinguishedName") == 0) {
    return 0;
  }
  return -1;
}

_PRIVATE_ char ldb_ascii_toupper(char c) {
  /*
   * We are aiming for a 1970s C-locale toupper(), when all letters
   * were 7-bit and behaved with true American spirit.
   *
   * For example, we don't want the "i" in "<guid=" to be upper-cased to
   * "İ" as would happen in some locales, or we won't be able to parse
   * that properly. This is unfortunate for cases where we are dealing
   * with real text; a search for the name "Ali" would need to be
   * written "Alİ" to match.
   */
  return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;
}

Coverage Report

Created: 2025-07-23 07:04

Line	Count	Source (jump to first uncovered line)
1		/*
2		ldb database library
3
4		Copyright (C) Andrew Tridgell 2004
5
6		** NOTE! The following LGPL license applies to the ldb
7		** library. This does NOT imply that all of Samba is released
8		** under the LGPL
9
10		This library is free software; you can redistribute it and/or
11		modify it under the terms of the GNU Lesser General Public
12		License as published by the Free Software Foundation; either
13		version 3 of the License, or (at your option) any later version.
14
15		This library is distributed in the hope that it will be useful,
16		but WITHOUT ANY WARRANTY; without even the implied warranty of
17		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18		Lesser General Public License for more details.
19
20		You should have received a copy of the GNU Lesser General Public
21		License along with this library; if not, see <http://www.gnu.org/licenses/>.
22		*/
23
24		/*
25		* Name: ldb
26		*
27		* Component: ldb utf8 handling
28		*
29		* Description: case folding and case comparison for UTF8 strings
30		*
31		* Author: Andrew Tridgell
32		*/
33
34		#include "ldb_private.h"
35		#include "system/locale.h"
36
37		/*
38		* Set functions for comparing and case-folding case-insensitive ldb val
39		* strings.
40		*/
41		void ldb_set_utf8_functions(struct ldb_context *ldb,
42		void *context,
43		char (casefold)(void , void , const char *, size_t),
44		int (casecmp)(void ctx,
45		const struct ldb_val *v1,
46		const struct ldb_val *v2))
47	0	{
48	0	if (context) {
49	0	ldb->utf8_fns.context = context;
50	0	}
51	0	if (casefold) {
52	0	ldb->utf8_fns.casefold = casefold;
53	0	}
54	0	if (casecmp) {
55	0	ldb->utf8_fns.casecmp = casecmp;
56	0	}
57	0	}
58
59		/*
60		this allow the user to pass in a caseless comparison
61		function to handle utf8 caseless comparisons
62		*/
63		void ldb_set_utf8_fns(struct ldb_context *ldb,
64		void *context,
65		char (casefold)(void , void , const char *, size_t))
66	0	{
67	0	ldb_set_utf8_functions(ldb, context, casefold, NULL);
68	0	}
69
70
71		/*
72		a simple case folding function
73		NOTE: does not handle UTF8
74		*/
75		char ldb_casefold_default(void context, TALLOC_CTX mem_ctx, const char s, size_t n)
76	0	{
77	0	size_t i;
78	0	char *ret = talloc_strndup(mem_ctx, s, n);
79	0	if (!s) {
80	0	errno = ENOMEM;
81	0	return NULL;
82	0	}
83	0	for (i=0;ret[i];i++) {
84	0	ret[i] = ldb_ascii_toupper(ret[i]);
85	0	}
86	0	return ret;
87	0	}
88
89
90		/*
91		* The default comparison fold function only knows ASCII. Multiple
92		* spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
93		* other bytes are compared without casefolding.
94		*
95		* Note that as well as not handling UTF-8, this function does not exactly
96		* implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
97		*/
98
99		int ldb_comparison_fold_ascii(void *ignored,
100		const struct ldb_val *v1,
101		const struct ldb_val *v2)
102	0	{
103	0	const uint8_t *s1 = v1->data;
104	0	const uint8_t *s2 = v2->data;
105	0	size_t n1 = v1->length, n2 = v2->length;
106
107	0	while (n1 && *s1 == ' ') { s1++; n1--; };
108	0	while (n2 && *s2 == ' ') { s2++; n2--; };
109
110	0	while (n1 && n2 && s1 && s2) {
111	0	if (ldb_ascii_toupper(s1) != ldb_ascii_toupper(s2)) {
112	0	break;
113	0	}
114	0	if (*s1 == ' ') {
115	0	while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
116	0	while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
117	0	}
118	0	s1++; s2++;
119	0	n1--; n2--;
120	0	}
121
122		/* check for trailing spaces only if the other pointers has
123		* reached the end of the strings otherwise we can
124		* mistakenly match. ex. "domain users" <->
125		* "domainUpdates"
126		*/
127	0	if (n1 && s1 == ' ' && (!n2 \|\| !s2)) {
128	0	while (n1 && *s1 == ' ') { s1++; n1--; }
129	0	}
130	0	if (n2 && s2 == ' ' && (!n1 \|\| !s1)) {
131	0	while (n2 && *s2 == ' ') { s2++; n2--; }
132	0	}
133	0	if (n1 == 0 && n2 != 0) {
134	0	return *s2 ? -1 : 0;
135	0	}
136	0	if (n2 == 0 && n1 != 0) {
137	0	return *s1 ? 1 : 0;
138	0	}
139	0	if (n1 == 0 && n2 == 0) {
140	0	return 0;
141	0	}
142	0	return NUMERIC_CMP(s1, s2);
143	0	}
144
145		void ldb_set_utf8_default(struct ldb_context *ldb)
146	0	{
147	0	ldb_set_utf8_functions(ldb, NULL,
148	0	ldb_casefold_default,
149	0	ldb_comparison_fold_ascii);
150	0	}
151
152		char ldb_casefold(struct ldb_context ldb, TALLOC_CTX mem_ctx, const char s, size_t n)
153	0	{
154	0	return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
155	0	}
156
157		/*
158		check the attribute name is valid according to rfc2251
159		returns 1 if the name is ok
160		*/
161
162		int ldb_valid_attr_name(const char *s)
163	0	{
164	0	size_t i;
165
166	0	if (!s \|\| !s[0])
167	0	return 0;
168
169		/* handle special ldb_tdb wildcard */
170	0	if (strcmp(s, "*") == 0) return 1;
171
172	0	for (i = 0; s[i]; i++) {
173	0	if (! isascii(s[i])) {
174	0	return 0;
175	0	}
176	0	if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
177	0	if (! (isalpha(s[i]) \|\| (s[i] == '@'))) {
178	0	return 0;
179	0	}
180	0	} else {
181	0	if (! (isalnum(s[i]) \|\| (s[i] == '-'))) {
182	0	return 0;
183	0	}
184	0	}
185	0	}
186	0	return 1;
187	0	}
188
189		char ldb_attr_casefold(TALLOC_CTX mem_ctx, const char *s)
190	0	{
191	0	size_t i;
192	0	char *ret = talloc_strdup(mem_ctx, s);
193	0	if (!ret) {
194	0	errno = ENOMEM;
195	0	return NULL;
196	0	}
197	0	for (i = 0; ret[i]; i++) {
198	0	ret[i] = ldb_ascii_toupper(ret[i]);
199	0	}
200	0	return ret;
201	0	}
202
203		/*
204		we accept either 'dn' or 'distinguishedName' for a distinguishedName
205		*/
206		int ldb_attr_dn(const char *attr)
207	0	{
208	0	if (ldb_attr_cmp(attr, "dn") == 0 \|\|
209	0	ldb_attr_cmp(attr, "distinguishedName") == 0) {
210	0	return 0;
211	0	}
212	0	return -1;
213	0	}
214
215	0	_PRIVATE_ char ldb_ascii_toupper(char c) {
216		/*
217		* We are aiming for a 1970s C-locale toupper(), when all letters
218		* were 7-bit and behaved with true American spirit.
219		*
220		* For example, we don't want the "i" in "<guid=" to be upper-cased to
221		* "İ" as would happen in some locales, or we won't be able to parse
222		* that properly. This is unfortunate for cases where we are dealing
223		* with real text; a search for the name "Ali" would need to be
224		* written "Alİ" to match.
225		*/
226	0	return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;
227	0	}