Coverage Report

Created: 2025-12-31 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/samba/lib/util/charset/util_unistr.c
Line
Count
Source
1
/*
2
   Unix SMB/CIFS implementation.
3
   Samba utility functions
4
   Copyright (C) Andrew Tridgell 1992-2001
5
   Copyright (C) Simo Sorce 2001
6
7
   This program is free software; you can redistribute it and/or modify
8
   it under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 3 of the License, or
10
   (at your option) any later version.
11
12
   This program is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
   GNU General Public License for more details.
16
17
   You should have received a copy of the GNU General Public License
18
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
#include "replace.h"
22
#include "system/locale.h"
23
#include "charset.h"
24
#include "lib/util/byteorder.h"
25
#include "lib/util/fault.h"
26
#include "lib/util/tsort.h"
27
28
/**
29
 String replace.
30
 NOTE: oldc and newc must be 7 bit characters
31
**/
32
_PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
33
0
{
34
0
  struct smb_iconv_handle *ic = get_iconv_handle();
35
0
  while (s && *s) {
36
0
    size_t size;
37
0
    codepoint_t c = next_codepoint_handle(ic, s, &size);
38
0
    if (c == oldc) {
39
0
      *s = newc;
40
0
    }
41
0
    s += size;
42
0
  }
43
0
}
44
45
/**
46
 Convert a string to lower case, allocated with talloc
47
**/
48
_PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
49
              TALLOC_CTX *ctx, const char *src)
50
0
{
51
0
  size_t size=0;
52
0
  char *dest;
53
54
0
  if(src == NULL) {
55
0
    return NULL;
56
0
  }
57
58
  /* this takes advantage of the fact that upper/lower can't
59
     change the length of a character by more than 1 byte */
60
0
  dest = talloc_array(ctx, char, 2*(strlen(src))+1);
61
0
  if (dest == NULL) {
62
0
    return NULL;
63
0
  }
64
65
0
  while (*src) {
66
0
    size_t c_size;
67
0
    codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
68
0
    src += c_size;
69
70
0
    c = tolower_m(c);
71
72
0
    c_size = push_codepoint_handle(iconv_handle, dest+size, c);
73
0
    if (c_size == -1) {
74
0
      talloc_free(dest);
75
0
      return NULL;
76
0
    }
77
0
    size += c_size;
78
0
  }
79
80
0
  dest[size] = 0;
81
82
  /* trim it so talloc_append_string() works */
83
0
  dest = talloc_realloc(ctx, dest, char, size+1);
84
85
0
  talloc_set_name_const(dest, dest);
86
87
0
  return dest;
88
0
}
89
90
_PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
91
0
{
92
0
  struct smb_iconv_handle *iconv_handle = get_iconv_handle();
93
0
  return strlower_talloc_handle(iconv_handle, ctx, src);
94
0
}
95
96
/**
97
 Convert a string to UPPER case, allocated with talloc
98
 source length limited to n bytes, iconv handle supplied
99
**/
100
_PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
101
          TALLOC_CTX *ctx, const char *src, size_t n)
102
298
{
103
298
  size_t size=0;
104
298
  char *dest;
105
106
298
  if (!src) {
107
0
    return NULL;
108
0
  }
109
110
  /* this takes advantage of the fact that upper/lower can't
111
     change the length of a character by more than 1 byte */
112
298
  dest = talloc_array(ctx, char, 2*(n+1));
113
298
  if (dest == NULL) {
114
0
    return NULL;
115
0
  }
116
117
85.3k
  while (n && *src) {
118
85.1k
    size_t c_size;
119
85.1k
    codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
120
85.1k
                CH_UNIX, &c_size);
121
85.1k
    src += c_size;
122
85.1k
    n -= c_size;
123
124
85.1k
    c = toupper_m(c);
125
126
85.1k
    c_size = push_codepoint_handle(iconv_handle, dest+size, c);
127
85.1k
    if (c_size == -1) {
128
53
      talloc_free(dest);
129
53
      return NULL;
130
53
    }
131
85.0k
    size += c_size;
132
85.0k
  }
133
134
245
  dest[size] = 0;
135
136
  /* trim it so talloc_append_string() works */
137
245
  dest = talloc_realloc(ctx, dest, char, size+1);
138
139
245
  talloc_set_name_const(dest, dest);
140
141
245
  return dest;
142
298
}
143
144
/**
145
 Convert a string to UPPER case, allocated with talloc
146
 source length limited to n bytes
147
**/
148
_PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
149
298
{
150
298
  struct smb_iconv_handle *iconv_handle = get_iconv_handle();
151
298
  return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
152
298
}
153
/**
154
 Convert a string to UPPER case, allocated with talloc
155
**/
156
_PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
157
298
{
158
298
  return strupper_talloc_n(ctx, src, src?strlen(src):0);
159
298
}
160
161
/**
162
 talloc_strdup() a unix string to upper case.
163
**/
164
_PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
165
0
{
166
0
  return strupper_talloc(ctx, src);
167
0
}
168
169
170
/*
171
 * strncasecmp_ldb() works like a *bit* like strncasecmp, with various
172
 * tricks to suit the way LDB compares strings. The differences are:
173
 *
174
 * 0. each string has it's own length.
175
 *
176
 * 1. consecutive spaces are collapsed down to one space, so that
177
 *    "a  b" equals "a b". (this is why each string needs its own
178
 *    length). Leading and trailing spaces are removed altogether.
179
 *
180
 * 2. Comparisons are done in UPPER CASE, as Windows does, not in
181
 *    lowercase as POSIX would have it.
182
 *
183
 * 3. An invalid byte compares higher than any real character. For example,
184
 *    "hello\xc2\xff" would sort higher than "hello\xcd\xb6", because CD
185
 *    B6 is a valid sequence and C2 FF is not.
186
 *
187
 * 4. If two strings become invalid on the same character, the rest
188
 *    of the string is compared via ldb ASCII case fold rules.
189
 *
190
 *    For example, "hellō\xC2\xFFworld" < " hElLŌ\xFE ", because the
191
 *    strings are equal up to 'ō' by utf-8 casefold, but the "\xc2\xff" and
192
 *    "\xfe" are invalid sequences. At that point, we skip to the byte-by-byte
193
 *    (but space-eating, casefolding) comparison, and 0xc2 < 0xff.
194
 */
195
196
#define EAT_SPACE(s, len, ends_in_space)       \
197
9.90k
  do {              \
198
15.6k
    while (len) {          \
199
11.7k
      if (*s != ' ') {      \
200
6.00k
        break;         \
201
6.00k
      }           \
202
11.7k
      s++;           \
203
5.74k
      len--;           \
204
5.74k
    }             \
205
9.90k
    ends_in_space = (len == 0 || *s == '\0');   \
206
9.90k
  } while(0)
207
208
209
_PUBLIC_ int strncasecmp_ldb(const char *s1,
210
           size_t len1,
211
           const char *s2,
212
           size_t len2)
213
4.26k
{
214
4.26k
  struct smb_iconv_handle *iconv_handle = get_iconv_handle();
215
4.26k
  codepoint_t c1, c2;
216
4.26k
  size_t cs1, cs2;
217
4.26k
  bool ends_in_space1, ends_in_space2;
218
4.26k
  int ret;
219
4.26k
  bool end1, end2;
220
221
4.26k
  EAT_SPACE(s1, len1, ends_in_space1);
222
4.26k
  EAT_SPACE(s2, len2, ends_in_space2);
223
  /*
224
   * if ends_in_space was set, the string was empty or only
225
   * spaces (which we treat as equivalent).
226
   */
227
4.26k
  if (ends_in_space1 && ends_in_space2) {
228
1.18k
    return 0;
229
1.18k
  }
230
3.07k
  if (ends_in_space1) {
231
720
    return -1;
232
720
  }
233
2.35k
  if (ends_in_space2) {
234
720
    return 1;
235
720
  }
236
237
4.95k
  while (true) {
238
    /*
239
     * If the next byte is a space, we eat all the spaces,
240
     * and say we found a single codepoint. If the spaces
241
     * were at the end of the string, the codepoint is 0,
242
     * as if there were no spaces. Otherwise it is 0x20,
243
     * as if there was one space.
244
     *
245
     * Setting the codepoint to 0 will break the loop, but
246
     * only after codepoints have been found in both strings.
247
     */
248
4.95k
    if (len1 == 0 || *s1 == 0) {
249
528
      c1 = 0;
250
4.43k
    } else if (*s1 == ' ') {
251
249
      EAT_SPACE(s1, len1, ends_in_space1);
252
249
      c1 = ends_in_space1 ? 0 : ' ';
253
4.18k
    } else if ((*s1 & 0x80) == 0) {
254
2.17k
      c1 = *s1;
255
2.17k
      s1++;
256
2.17k
      len1--;
257
2.17k
    } else {
258
2.01k
      c1 = next_codepoint_handle_ext(iconv_handle, s1, len1,
259
2.01k
                   CH_UNIX, &cs1);
260
2.01k
      if (c1 != INVALID_CODEPOINT) {
261
1.22k
        s1 += cs1;
262
1.22k
        len1 -= cs1;
263
1.22k
      }
264
2.01k
    }
265
266
4.95k
    if (len2 == 0 || *s2 == 0) {
267
528
      c2 = 0;
268
4.43k
    } else if (*s2 == ' ') {
269
249
      EAT_SPACE(s2, len2, ends_in_space2);
270
249
      c2 = ends_in_space2 ? 0 : ' ';
271
4.18k
    } else if ((*s2 & 0x80) == 0) {
272
2.17k
      c2 = *s2;
273
2.17k
      s2++;
274
2.17k
      len2--;
275
2.17k
    } else {
276
2.01k
      c2 = next_codepoint_handle_ext(iconv_handle, s2, len2,
277
2.01k
                   CH_UNIX, &cs2);
278
2.01k
      if (c2 != INVALID_CODEPOINT) {
279
1.22k
        s2 += cs2;
280
1.22k
        len2 -= cs2;
281
1.22k
      }
282
2.01k
    }
283
284
4.95k
    if (c1 == 0 || c2 == 0 ||
285
4.35k
        c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) {
286
1.43k
      break;
287
1.43k
    }
288
289
3.52k
    if (c1 == c2) {
290
2.98k
      continue;
291
2.98k
    }
292
544
    c1 = toupper_m(c1);
293
544
    c2 = toupper_m(c2);
294
544
    if (c1 != c2) {
295
206
      break;
296
206
    }
297
544
  }
298
299
  /*
300
   * Either a difference has been found, or one or both strings have
301
   * ended or hit invalid codepoints.
302
   */
303
1.63k
  ret = NUMERIC_CMP(c1, c2);
304
305
1.63k
  if (ret != 0) {
306
394
    return ret;
307
394
  }
308
  /*
309
   * the strings are equal up to here, but one might be longer.
310
   */
311
1.24k
  end1 = len1 == 0 || *s1 == 0;
312
1.24k
  end2 = len2 == 0 || *s2 == 0;
313
314
1.24k
  if (end1 && end2) {
315
520
    return 0;
316
520
  }
317
724
  if (end1) {
318
0
    return -1;
319
0
  }
320
724
  if (end2) {
321
0
    return -1;
322
0
  }
323
324
  /*
325
   * By elimination, if we got here, we have INVALID_CODEPOINT on both
326
   * sides.
327
   *
328
   * THere is no perfect option, but what we choose to do is continue on
329
   * with ascii case fold (as if calling ldb_comparison_fold_ascii()
330
   * which is private to ldb, so we can't just defer to it).
331
   */
332
22.9k
  while (true) {
333
22.9k
    if (len1 == 0 || *s1 == 0) {
334
526
      c1 = 0;
335
22.4k
    } else if (*s1 == ' ') {
336
435
      EAT_SPACE(s1, len1, ends_in_space1);
337
435
      c1 = ends_in_space1 ? 0 : ' ';
338
22.0k
    } else {
339
22.0k
      c1 = *s1;
340
22.0k
      s1++;
341
22.0k
      len1--;
342
22.0k
      c1 = ('a' <= c1 && c1 <= 'z') ? c1 ^ 0x20 : c1;
343
22.0k
    }
344
345
22.9k
    if (len2 == 0 || *s2 == 0) {
346
526
      c2 = 0;
347
22.4k
    } else if (*s2 == ' ') {
348
435
      EAT_SPACE(s2, len2, ends_in_space2);
349
435
      c2 = ends_in_space2 ? 0 : ' ';
350
22.0k
    } else {
351
22.0k
      c2 = *s2;
352
22.0k
      s2++;
353
22.0k
      len2--;
354
22.0k
      c2 = ('a' <= c2 && c2 <= 'z') ? c2 ^ 0x20 : c2;
355
22.0k
    }
356
357
22.9k
    if (c1 == 0 || c2 == 0 || c1 != c2) {
358
724
      break;
359
724
    }
360
22.9k
  }
361
724
  return NUMERIC_CMP(c1, c2);
362
724
}
363
364
#undef EAT_SPACE
365
366
367
/**
368
 Find the number of 'c' chars in a string
369
**/
370
_PUBLIC_ size_t count_chars_m(const char *s, char c)
371
0
{
372
0
  struct smb_iconv_handle *ic = get_iconv_handle();
373
0
  size_t count = 0;
374
375
0
  while (*s) {
376
0
    size_t size;
377
0
    codepoint_t c2 = next_codepoint_handle(ic, s, &size);
378
0
    if (c2 == c) count++;
379
0
    s += size;
380
0
  }
381
382
0
  return count;
383
0
}
384
385
size_t ucs2_align(const void *base_ptr, const void *p, int flags)
386
0
{
387
0
  if (flags & (STR_NOALIGN|STR_ASCII)) {
388
0
    return 0;
389
0
  }
390
0
  return PTR_DIFF(p, base_ptr) & 1;
391
0
}
392
393
/**
394
return the number of bytes occupied by a buffer in CH_UTF16 format
395
**/
396
size_t utf16_len(const void *buf)
397
2.03k
{
398
2.03k
  size_t len;
399
400
61.2M
  for (len = 0; PULL_LE_U16(buf,len); len += 2) ;
401
402
2.03k
  return len;
403
2.03k
}
404
405
/**
406
return the number of bytes occupied by a buffer in CH_UTF16 format
407
the result includes the null termination
408
**/
409
size_t utf16_null_terminated_len(const void *buf)
410
1.32k
{
411
1.32k
  return utf16_len(buf) + 2;
412
1.32k
}
413
414
/**
415
return the number of bytes occupied by a buffer in CH_UTF16 format
416
limited by 'n' bytes
417
**/
418
size_t utf16_len_n(const void *src, size_t n)
419
7.42M
{
420
7.42M
  size_t len;
421
422
3.07G
  for (len = 0; (len+2 <= n) && PULL_LE_U16(src, len); len += 2) ;
423
424
7.42M
  return len;
425
7.42M
}
426
427
/**
428
return the number of bytes occupied by a buffer in CH_UTF16 format
429
the result includes the null termination
430
limited by 'n' bytes
431
**/
432
size_t utf16_null_terminated_len_n(const void *src, size_t n)
433
7.42M
{
434
7.42M
  size_t len;
435
436
7.42M
  len = utf16_len_n(src, n);
437
438
7.42M
  if (len+2 <= n) {
439
7.38M
    len += 2;
440
7.38M
  }
441
442
7.42M
  return len;
443
7.42M
}
444
445
unsigned char *talloc_utf16_strlendup(TALLOC_CTX *mem_ctx, const char *str, size_t len)
446
758
{
447
758
  unsigned char *new_str = NULL;
448
449
  /* Check for overflow. */
450
758
  if (len > SIZE_MAX - 2) {
451
0
    return NULL;
452
0
  }
453
454
  /*
455
   * Allocate the new string, including space for the
456
   * UTF‐16 null terminator.
457
   */
458
758
  new_str = talloc_size(mem_ctx, len + 2);
459
758
  if (new_str == NULL) {
460
0
    return NULL;
461
0
  }
462
463
758
  memcpy(new_str, str, len);
464
465
  /*
466
   * Ensure that the UTF‐16 string is
467
   * null‐terminated.
468
   */
469
758
  new_str[len] = '\0';
470
758
  new_str[len + 1] = '\0';
471
472
758
  return new_str;
473
758
}
474
475
unsigned char *talloc_utf16_strdup(TALLOC_CTX *mem_ctx, const char *str)
476
0
{
477
0
  if (str == NULL) {
478
0
    return NULL;
479
0
  }
480
0
  return talloc_utf16_strlendup(mem_ctx, str, utf16_len(str));
481
0
}
482
483
unsigned char *talloc_utf16_strndup(TALLOC_CTX *mem_ctx, const char *str, size_t n)
484
0
{
485
0
  if (str == NULL) {
486
0
    return NULL;
487
0
  }
488
0
  return talloc_utf16_strlendup(mem_ctx, str, utf16_len_n(str, n));
489
0
}
490
491
/**
492
 * Determine the length and validity of a utf-8 string.
493
 *
494
 * @param input the string pointer
495
 * @param maxlen maximum size of the string
496
 * @param byte_len receives the length of the valid section
497
 * @param char_len receives the number of unicode characters in the valid section
498
 * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
499
 *
500
 * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
501
 */
502
bool utf8_check(const char *input, size_t maxlen,
503
    size_t *byte_len,
504
    size_t *char_len,
505
    size_t *utf16_len)
506
0
{
507
0
  const uint8_t *s = (const uint8_t *)input;
508
0
  size_t i;
509
0
  size_t chars = 0;
510
0
  size_t long_chars = 0;
511
0
  uint32_t codepoint;
512
0
  uint8_t a, b, c, d;
513
0
  for (i = 0; i < maxlen; i++, chars++) {
514
0
    if (s[i] == 0) {
515
0
      break;
516
0
    }
517
0
    if (s[i] < 0x80) {
518
0
      continue;
519
0
    }
520
0
    if ((s[i] & 0xe0) == 0xc0) {
521
      /* 110xxxxx 10xxxxxx */
522
0
      a = s[i];
523
0
      if (maxlen - i < 2) {
524
0
        goto error;
525
0
      }
526
0
      b = s[i + 1];
527
0
      if ((b & 0xc0) != 0x80) {
528
0
        goto error;
529
0
      }
530
0
      codepoint = (a & 31) << 6 | (b & 63);
531
0
      if (codepoint < 0x80) {
532
0
        goto error;
533
0
      }
534
0
      i++;
535
0
      continue;
536
0
    }
537
0
    if ((s[i] & 0xf0) == 0xe0) {
538
      /* 1110xxxx 10xxxxxx 10xxxxxx */
539
0
      if (maxlen - i < 3) {
540
0
        goto error;
541
0
      }
542
0
      a = s[i];
543
0
      b = s[i + 1];
544
0
      c = s[i + 2];
545
0
      if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
546
0
        goto error;
547
0
      }
548
0
      codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
549
550
0
      if (codepoint < 0x800) {
551
0
        goto error;
552
0
      }
553
0
      if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
554
        /*
555
         * This is an invalid codepoint, per
556
         * RFC3629, as it encodes part of a
557
         * UTF-16 surrogate pair for a
558
         * character over U+10000, which ought
559
         * to have been encoded as a four byte
560
         * utf-8 sequence.
561
         */
562
0
        goto error;
563
0
      }
564
0
      i += 2;
565
0
      continue;
566
0
    }
567
568
0
    if ((s[i] & 0xf8) == 0xf0) {
569
      /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
570
0
      if (maxlen - i < 4) {
571
0
        goto error;
572
0
      }
573
0
      a = s[i];
574
0
      b = s[i + 1];
575
0
      c = s[i + 2];
576
0
      d = s[i + 3];
577
578
0
      if ((b & 0xc0) != 0x80 ||
579
0
          (c & 0xc0) != 0x80 ||
580
0
          (d & 0xc0) != 0x80) {
581
0
        goto error;
582
0
      }
583
0
      codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
584
585
0
      if (codepoint < 0x10000 || codepoint > 0x10ffff) {
586
0
        goto error;
587
0
      }
588
      /* this one will need two UTF16 characters */
589
0
      long_chars++;
590
0
      i += 3;
591
0
      continue;
592
0
    }
593
    /*
594
     * If it wasn't handled yet, it's wrong.
595
     */
596
0
    goto error;
597
0
  }
598
0
  *byte_len = i;
599
0
  *char_len = chars;
600
0
  *utf16_len = chars + long_chars;
601
0
  return true;
602
603
0
error:
604
0
  *byte_len = i;
605
0
  *char_len = chars;
606
0
  *utf16_len = chars + long_chars;
607
0
  return false;
608
0
}
609
610
611
/**
612
 * Copy a string from a char* unix src to a dos codepage string destination.
613
 *
614
 * @converted_size the number of bytes occupied by the string in the destination.
615
 * @return bool true if success.
616
 *
617
 * @param flags can include
618
 * <dl>
619
 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
620
 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
621
 * </dl>
622
 *
623
 * @param dest_len the maximum length in bytes allowed in the
624
 * destination.  If @p dest_len is -1 then no maximum is used.
625
 **/
626
static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
627
0
{
628
0
  size_t src_len;
629
0
  bool ret;
630
631
0
  if (flags & STR_UPPER) {
632
0
    char *tmpbuf = strupper_talloc(NULL, src);
633
0
    if (tmpbuf == NULL) {
634
0
      return false;
635
0
    }
636
0
    ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
637
0
    talloc_free(tmpbuf);
638
0
    return ret;
639
0
  }
640
641
0
  src_len = strlen(src);
642
643
0
  if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
644
0
    src_len++;
645
646
0
  return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
647
0
}
648
649
/**
650
 * Copy a string from a dos codepage source to a unix char* destination.
651
 *
652
 * The resulting string in "dest" is always null terminated.
653
 *
654
 * @param flags can have:
655
 * <dl>
656
 * <dt>STR_TERMINATE</dt>
657
 * <dd>STR_TERMINATE means the string in @p src
658
 * is null terminated, and src_len is ignored.</dd>
659
 * </dl>
660
 *
661
 * @param src_len is the length of the source area in bytes.
662
 * @returns the number of bytes occupied by the string in @p src.
663
 **/
664
static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
665
0
{
666
0
  size_t size = 0;
667
668
0
  if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
669
0
    if (src_len == (size_t)-1) {
670
0
      src_len = strlen((const char *)src) + 1;
671
0
    } else {
672
0
      size_t len = strnlen((const char *)src, src_len);
673
0
      if (len < src_len)
674
0
        len++;
675
0
      src_len = len;
676
0
    }
677
0
  }
678
679
  /* We're ignoring the return here.. */
680
0
  (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
681
682
0
  if (dest_len)
683
0
    dest[MIN(size, dest_len-1)] = 0;
684
685
0
  return src_len;
686
0
}
687
688
/**
689
 * Copy a string from a char* src to a unicode destination.
690
 *
691
 * @returns the number of bytes occupied by the string in the destination.
692
 *
693
 * @param flags can have:
694
 *
695
 * <dl>
696
 * <dt>STR_TERMINATE <dd>means include the null termination.
697
 * <dt>STR_UPPER     <dd>means uppercase in the destination.
698
 * <dt>STR_NOALIGN   <dd>means don't do alignment.
699
 * </dl>
700
 *
701
 * @param dest_len is the maximum length allowed in the
702
 * destination. If dest_len is -1 then no maximum is used.
703
 **/
704
static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
705
0
{
706
0
  size_t len=0;
707
0
  size_t src_len = strlen(src);
708
0
  size_t size = 0;
709
0
  bool ret;
710
711
0
  if (flags & STR_UPPER) {
712
0
    char *tmpbuf = strupper_talloc(NULL, src);
713
0
    ssize_t retval;
714
0
    if (tmpbuf == NULL) {
715
0
      return -1;
716
0
    }
717
0
    retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
718
0
    talloc_free(tmpbuf);
719
0
    return retval;
720
0
  }
721
722
0
  if (flags & STR_TERMINATE)
723
0
    src_len++;
724
725
0
  if (ucs2_align(NULL, dest, flags)) {
726
0
    *(char *)dest = 0;
727
0
    dest = (void *)((char *)dest + 1);
728
0
    if (dest_len) dest_len--;
729
0
    len++;
730
0
  }
731
732
  /* ucs2 is always a multiple of 2 bytes */
733
0
  dest_len &= ~1;
734
735
0
  ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
736
0
  if (ret == false) {
737
0
    return 0;
738
0
  }
739
740
0
  len += size;
741
742
0
  return (ssize_t)len;
743
0
}
744
745
746
/**
747
 Copy a string from a ucs2 source to a unix char* destination.
748
 Flags can have:
749
  STR_TERMINATE means the string in src is null terminated.
750
  STR_NOALIGN   means don't try to align.
751
 if STR_TERMINATE is set then src_len is ignored if it is -1.
752
 src_len is the length of the source area in bytes
753
 Return the number of bytes occupied by the string in src.
754
 The resulting string in "dest" is always null terminated.
755
**/
756
757
static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
758
0
{
759
0
  size_t size = 0;
760
761
0
  if (ucs2_align(NULL, src, flags)) {
762
0
    src = (const void *)((const char *)src + 1);
763
0
    if (src_len > 0)
764
0
      src_len--;
765
0
  }
766
767
0
  if (flags & STR_TERMINATE) {
768
0
    if (src_len == (size_t)-1) {
769
0
      src_len = utf16_null_terminated_len(src);
770
0
    } else {
771
0
      src_len = utf16_null_terminated_len_n(src, src_len);
772
0
    }
773
0
  }
774
775
  /* ucs2 is always a multiple of 2 bytes */
776
0
  if (src_len != (size_t)-1)
777
0
    src_len &= ~1;
778
779
  /* We're ignoring the return here.. */
780
0
  (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
781
0
  if (dest_len)
782
0
    dest[MIN(size, dest_len-1)] = 0;
783
784
0
  return src_len;
785
0
}
786
787
/**
788
 Copy a string from a char* src to a unicode or ascii
789
 dos codepage destination choosing unicode or ascii based on the
790
 flags in the SMB buffer starting at base_ptr.
791
 Return the number of bytes occupied by the string in the destination.
792
 flags can have:
793
  STR_TERMINATE means include the null termination.
794
  STR_UPPER     means uppercase in the destination.
795
  STR_ASCII     use ascii even with unicode packet.
796
  STR_NOALIGN   means don't do alignment.
797
 dest_len is the maximum length allowed in the destination. If dest_len
798
 is -1 then no maximum is used.
799
**/
800
801
_PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
802
0
{
803
0
  if (flags & STR_ASCII) {
804
0
    size_t size = 0;
805
0
    if (push_ascii_string(dest, src, dest_len, flags, &size)) {
806
0
      return (ssize_t)size;
807
0
    } else {
808
0
      return (ssize_t)-1;
809
0
    }
810
0
  } else if (flags & STR_UNICODE) {
811
0
    return push_ucs2(dest, src, dest_len, flags);
812
0
  } else {
813
0
    smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
814
0
    return -1;
815
0
  }
816
0
}
817
818
819
/**
820
 Copy a string from a unicode or ascii source (depending on
821
 the packet flags) to a char* destination.
822
 Flags can have:
823
  STR_TERMINATE means the string in src is null terminated.
824
  STR_UNICODE   means to force as unicode.
825
  STR_ASCII     use ascii even with unicode packet.
826
  STR_NOALIGN   means don't do alignment.
827
 if STR_TERMINATE is set then src_len is ignored is it is -1
828
 src_len is the length of the source area in bytes.
829
 Return the number of bytes occupied by the string in src.
830
 The resulting string in "dest" is always null terminated.
831
**/
832
833
_PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
834
0
{
835
0
  if (flags & STR_ASCII) {
836
0
    return pull_ascii_string(dest, src, dest_len, src_len, flags);
837
0
  } else if (flags & STR_UNICODE) {
838
0
    return pull_ucs2(dest, src, dest_len, src_len, flags);
839
0
  } else {
840
0
    smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
841
0
    return -1;
842
0
  }
843
0
}