Coverage Report

Created: 2025-12-31 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/samba/lib/util/charset/charset_macosxfs.c
Line
Count
Source
1
/*
2
   Unix SMB/CIFS implementation.
3
   Samba charset module for Mac OS X/Darwin
4
   Copyright (C) Benjamin Riefenstahl 2003
5
6
   This program is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3 of the License, or
9
   (at your option) any later version.
10
11
   This program is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
   GNU General Public License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
*/
19
20
/*
21
 * modules/charset_macosxfs.c
22
 *
23
 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24
 * and display encoding.
25
 *
26
 * Actually two implementations are provided here.  The default
27
 * implementation is based on the official CFString API.  The other is
28
 * based on internal CFString APIs as defined in the OpenDarwin
29
 * source.
30
 */
31
32
#include "replace.h"
33
#include "charset.h"
34
#include "charset_proto.h"
35
#include "lib/util/debug.h"
36
#undef realloc
37
38
#ifdef DARWINOS
39
40
/*
41
 * Include OS frameworks.  These are only needed in this module.
42
 */
43
#include <CoreFoundation/CFString.h>
44
45
/*
46
 * See if autoconf has found us the internal headers in some form.
47
 */
48
#if defined(HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H)
49
# include <CoreFoundation/CFStringEncodingConverter.h>
50
# include <CoreFoundation/CFUnicodePrecomposition.h>
51
# define USE_INTERNAL_API 1
52
#elif defined(HAVE_CFSTRINGENCODINGCONVERTER_H)
53
# include <CFStringEncodingConverter.h>
54
# include <CFUnicodePrecomposition.h>
55
# define USE_INTERNAL_API 1
56
#endif
57
58
/*
59
 * Compile time configuration: Do we want debug output?
60
 */
61
/* #define DEBUG_STRINGS 1 */
62
63
/*
64
 * A simple, but efficient memory provider for our buffers.
65
 */
66
static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
67
{
68
  if (newsize > *size) {
69
    *size = newsize + 128;
70
    buffer = realloc(buffer, *size);
71
  }
72
  return buffer;
73
}
74
75
/*
76
 * While there is a version of OpenDarwin for intel, the usual case is
77
 * big-endian PPC.  So we need byte swapping to handle the
78
 * little-endian byte order of the network protocol.  We also need an
79
 * additional dynamic buffer to do this work for incoming data blocks,
80
 * because we have to consider the original data as constant.
81
 *
82
 * We abstract the differences away by providing a simple facade with
83
 * these functions/macros:
84
 *
85
 *  le_to_native(dst,src,len)
86
 *  native_to_le(cp,len)
87
 *  set_ucbuffer_with_le(buffer,bufsize,data,size)
88
 *  set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
89
 */
90
#ifdef WORDS_BIGENDIAN
91
92
static inline void swap_bytes (char * dst, const char * src, size_t len)
93
{
94
  const char *srcend = src + len;
95
  while (src < srcend) {
96
    dst[0] = src[1];
97
    dst[1] = src[0];
98
    dst += 2;
99
    src += 2;
100
  }
101
}
102
static inline void swap_bytes_inplace (char * cp, size_t len)
103
{
104
  char temp;
105
  char *end = cp + len;
106
  while (cp  < end) {
107
    temp = cp[1];
108
    cp[1] = cp[0];
109
    cp[0] = temp;
110
    cp += 2;
111
  }
112
}
113
114
#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
115
#define native_to_le(cp,len)    swap_bytes_inplace(cp,len)
116
#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
117
  set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
118
119
#else /* ! WORDS_BIGENDIAN */
120
121
#define le_to_native(dst,src,len) memcpy(dst,src,len)
122
#define native_to_le(cp,len)    /* nothing */
123
#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
124
  (((void)(bufsize)),(UniChar*)(data))
125
126
#endif
127
128
static inline UniChar *set_ucbuffer_with_le_copy (
129
  UniChar *buffer, size_t *bufsize,
130
  const void *data, size_t size, size_t reserve)
131
{
132
  buffer = resize_buffer(buffer, bufsize, size+reserve);
133
  le_to_native((char*)buffer,data,size);
134
  return buffer;
135
}
136
137
138
/*
139
 * A simple hexdump function for debugging error conditions.
140
 */
141
#define debug_out(s)  DEBUG(0,(s))
142
143
#ifdef DEBUG_STRINGS
144
145
static void hexdump( const char * label, const char * s, size_t len )
146
{
147
  size_t restlen = len;
148
  debug_out("<<<<<<<\n");
149
  debug_out(label);
150
  debug_out("\n");
151
  while (restlen > 0) {
152
    char line[100];
153
    size_t i, j;
154
    char * d = line;
155
#undef sprintf
156
    d += sprintf(d, "%04X ", (unsigned)(len-restlen));
157
    *d++ = ' ';
158
    for( i = 0; i<restlen && i<8; ++i ) {
159
      d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
160
    }
161
    for( j = i; j<8; ++j ) {
162
      d += sprintf(d, "   ");
163
    }
164
    *d++ = ' ';
165
    for( i = 8; i<restlen && i<16; ++i ) {
166
      d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
167
    }
168
    for( j = i; j<16; ++j ) {
169
      d += sprintf(d, "   ");
170
    }
171
    *d++ = ' ';
172
    for( i = 0; i<restlen && i<16; ++i ) {
173
      if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
174
        *d++ = '.';
175
      else
176
        *d++ = s[i];
177
    }
178
    *d++ = '\n';
179
    *d = 0;
180
    restlen -= i;
181
    s += i;
182
    debug_out(line);
183
  }
184
  debug_out(">>>>>>>\n");
185
}
186
187
#else /* !DEBUG_STRINGS */
188
189
#define hexdump(label,s,len) /* nothing */
190
191
#endif
192
193
194
#if !USE_INTERNAL_API
195
196
/*
197
 * An implementation based on documented Mac OS X APIs.
198
 *
199
 * This does a certain amount of memory management, creating and
200
 * manipulating CFString objects.  We try to minimize the impact by
201
 * keeping those objects around and re-using them.  We also use
202
 * external backing store for the CFStrings where this is possible and
203
 * benficial.
204
 *
205
 * The Unicode normalizations forms available at this level are
206
 * generic, not specifically for the file system.  So they may not be
207
 * perfect fits.
208
 */
209
size_t macosxfs_encoding_pull(
210
  void *cd,       /* Encoder handle */
211
  const char **inbuf, size_t *inbytesleft, /* Script string */
212
  char **outbuf, size_t *outbytesleft)  /* UTF-16-LE string */
213
{
214
  static const int script_code = kCFStringEncodingUTF8;
215
  static CFMutableStringRef cfstring = NULL;
216
  size_t outsize;
217
  CFRange range;
218
219
  (void) cd; /* UNUSED */
220
221
  if (0 == *inbytesleft) {
222
    return 0;
223
  }
224
225
  if (NULL == cfstring) {
226
    /*
227
     * A version with an external backing store as in the
228
     * push function should have been more efficient, but
229
     * testing shows, that it is actually slower (!).
230
     * Maybe kCFAllocatorDefault gets shortcut evaluation
231
     * internally, while kCFAllocatorNull doesn't.
232
     */
233
    cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
234
  }
235
236
  /*
237
   * Three methods of appending to a CFString, choose the most
238
   * efficient.
239
   */
240
  if (0 == (*inbuf)[*inbytesleft-1]) {
241
    CFStringAppendCString(cfstring, *inbuf, script_code);
242
  } else if (*inbytesleft <= 255) {
243
    Str255 buffer;
244
    buffer[0] = *inbytesleft;
245
    memcpy(buffer+1, *inbuf, buffer[0]);
246
    CFStringAppendPascalString(cfstring, buffer, script_code);
247
  } else {
248
    /*
249
     * We would like to use a fixed buffer and a loop
250
     * here, but then we can't guarantee that the input is
251
     * well-formed UTF-8, as we are supposed to do.
252
     */
253
    static char *buffer = NULL;
254
    static size_t buflen = 0;
255
    buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
256
    memcpy(buffer, *inbuf, *inbytesleft);
257
    buffer[*inbytesleft] = 0;
258
    CFStringAppendCString(cfstring, *inbuf, script_code);
259
  }
260
261
  /*
262
   * Compose characters, using the non-canonical composition
263
   * form.
264
   */
265
  CFStringNormalize(cfstring, kCFStringNormalizationFormC);
266
267
  outsize = CFStringGetLength(cfstring);
268
  range = CFRangeMake(0,outsize);
269
270
  if (outsize == 0) {
271
    /*
272
     * HACK: smbd/mangle_hash2.c:is_legal_name() expects
273
     * errors here.  That function will always pass 2
274
     * characters.  smbd/open.c:check_for_pipe() cuts a
275
     * patchname to 10 characters blindly.  Suppress the
276
     * debug output in those cases.
277
     */
278
    if(2 != *inbytesleft && 10 != *inbytesleft) {
279
      debug_out("String conversion: "
280
          "An unknown error occurred\n");
281
      hexdump("UTF8->UTF16LE (old) input",
282
        *inbuf, *inbytesleft);
283
    }
284
    errno = EILSEQ; /* Not sure, but this is what we have
285
         * actually seen. */
286
    return -1;
287
  }
288
  if (outsize*2 > *outbytesleft) {
289
    CFStringDelete(cfstring, range);
290
    debug_out("String conversion: "
291
        "Output buffer too small\n");
292
    hexdump("UTF8->UTF16LE (old) input",
293
      *inbuf, *inbytesleft);
294
    errno = E2BIG;
295
    return -1;
296
  }
297
298
        CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
299
  CFStringDelete(cfstring, range);
300
301
  native_to_le(*outbuf, outsize*2);
302
303
  /*
304
   * Add a converted null byte, if the CFString conversions
305
   * prevented that until now.
306
   */
307
  if (0 == (*inbuf)[*inbytesleft-1] &&
308
      (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
309
310
    if ((outsize*2+2) > *outbytesleft) {
311
      debug_out("String conversion: "
312
          "Output buffer too small\n");
313
      hexdump("UTF8->UTF16LE (old) input",
314
        *inbuf, *inbytesleft);
315
      errno = E2BIG;
316
      return -1;
317
    }
318
319
    (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
320
    outsize += 2;
321
  }
322
323
  *inbuf += *inbytesleft;
324
  *inbytesleft = 0;
325
  *outbuf += outsize*2;
326
  *outbytesleft -= outsize*2;
327
328
  return 0;
329
}
330
331
size_t macosxfs_encoding_push(
332
  void *cd,       /* Encoder handle */
333
  const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
334
  char **outbuf, size_t *outbytesleft)  /* Script string */
335
{
336
  static const int script_code = kCFStringEncodingUTF8;
337
  static CFMutableStringRef cfstring = NULL;
338
  static UniChar *buffer = NULL;
339
  static size_t buflen = 0;
340
  CFIndex outsize, cfsize, charsconverted;
341
342
  (void) cd; /* UNUSED */
343
344
  if (0 == *inbytesleft) {
345
    return 0;
346
  }
347
348
  /*
349
   * We need a buffer that can hold 4 times the original data,
350
   * because that is the theoretical maximum that decomposition
351
   * can create currently (in Unicode 4.0).
352
   */
353
  buffer = set_ucbuffer_with_le_copy(
354
    buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
355
356
  if (NULL == cfstring) {
357
    cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
358
      kCFAllocatorDefault,
359
      buffer, *inbytesleft/2, buflen/2,
360
      kCFAllocatorNull);
361
  } else {
362
    CFStringSetExternalCharactersNoCopy(
363
      cfstring,
364
      buffer, *inbytesleft/2, buflen/2);
365
  }
366
367
  /*
368
   * Decompose characters, using the non-canonical decomposition
369
   * form.
370
   *
371
   * NB: This isn't exactly what HFS+ wants (see note on
372
   * kCFStringEncodingUseHFSPlusCanonical in
373
   * CFStringEncodingConverter.h), but AFAIK it's the best that
374
   * the official API can do.
375
   */
376
  CFStringNormalize(cfstring, kCFStringNormalizationFormD);
377
378
  cfsize = CFStringGetLength(cfstring);
379
  charsconverted = CFStringGetBytes(
380
    cfstring, CFRangeMake(0,cfsize),
381
    script_code, 0, false,
382
    *(UInt8 **)outbuf, *outbytesleft, &outsize);
383
384
  if (0 == charsconverted) {
385
    debug_out("String conversion: "
386
        "Buffer too small or not convertible\n");
387
    hexdump("UTF16LE->UTF8 (old) input",
388
      *inbuf, *inbytesleft);
389
    errno = EILSEQ; /* Probably more likely. */
390
    return -1;
391
  }
392
393
  /*
394
   * Add a converted null byte, if the CFString conversions
395
   * prevented that until now.
396
   */
397
  if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
398
      (0 != (*outbuf)[outsize-1])) {
399
400
    if (((size_t)outsize+1) > *outbytesleft) {
401
      debug_out("String conversion: "
402
          "Output buffer too small\n");
403
      hexdump("UTF16LE->UTF8 (old) input",
404
        *inbuf, *inbytesleft);
405
      errno = E2BIG;
406
      return -1;
407
    }
408
409
    (*outbuf)[outsize] = 0;
410
    ++outsize;
411
  }
412
413
  *inbuf += *inbytesleft;
414
  *inbytesleft = 0;
415
  *outbuf += outsize;
416
  *outbytesleft -= outsize;
417
418
  return 0;
419
}
420
421
#else /* USE_INTERNAL_API */
422
423
/*
424
 * An implementation based on internal code as known from the
425
 * OpenDarwin CVS.
426
 *
427
 * This code doesn't need much memory management because it uses
428
 * functions that operate on the raw memory directly.
429
 *
430
 * The push routine here is faster and more compatible with HFS+ than
431
 * the other implementation above.  The pull routine is only faster
432
 * for some strings, slightly slower for others.  The pull routine
433
 * looses because it has to iterate over the data twice, once to
434
 * decode UTF-8 and than to do the character composition required by
435
 * Windows.
436
 */
437
static size_t macosxfs_encoding_pull(
438
  void *cd,       /* Encoder handle */
439
  const char **inbuf, size_t *inbytesleft, /* Script string */
440
  char **outbuf, size_t *outbytesleft)  /* UTF-16-LE string */
441
{
442
  static const int script_code = kCFStringEncodingUTF8;
443
  UInt32 srcCharsUsed = 0;
444
  UInt32 dstCharsUsed = 0;
445
  UInt32 result;
446
  uint32_t dstDecomposedUsed = 0;
447
  uint32_t dstPrecomposedUsed = 0;
448
449
  (void) cd; /* UNUSED */
450
451
  if (0 == *inbytesleft) {
452
    return 0;
453
  }
454
455
        result = CFStringEncodingBytesToUnicode(
456
    script_code, kCFStringEncodingComposeCombinings,
457
    *inbuf, *inbytesleft, &srcCharsUsed,
458
    (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
459
460
  switch(result) {
461
  case kCFStringEncodingConversionSuccess:
462
    if (*inbytesleft == srcCharsUsed) {
463
      break;
464
    }
465
466
    FALL_THROUGH;
467
  case kCFStringEncodingInsufficientOutputBufferLength:
468
    debug_out("String conversion: "
469
        "Output buffer too small\n");
470
    hexdump("UTF8->UTF16LE (new) input",
471
      *inbuf, *inbytesleft);
472
    errno = E2BIG;
473
    return -1;
474
  case kCFStringEncodingInvalidInputStream:
475
    /*
476
     * HACK: smbd/mangle_hash2.c:is_legal_name() expects
477
     * errors here.  That function will always pass 2
478
     * characters.  smbd/open.c:check_for_pipe() cuts a
479
     * patchname to 10 characters blindly.  Suppress the
480
     * debug output in those cases.
481
     */
482
    if(2 != *inbytesleft && 10 != *inbytesleft) {
483
      debug_out("String conversion: "
484
          "Invalid input sequence\n");
485
      hexdump("UTF8->UTF16LE (new) input",
486
        *inbuf, *inbytesleft);
487
    }
488
    errno = EILSEQ;
489
    return -1;
490
  case kCFStringEncodingConverterUnavailable:
491
    debug_out("String conversion: "
492
        "Unknown encoding\n");
493
    hexdump("UTF8->UTF16LE (new) input",
494
      *inbuf, *inbytesleft);
495
    errno = EINVAL;
496
    return -1;
497
  }
498
499
  /*
500
   * It doesn't look like CFStringEncodingBytesToUnicode() can
501
   * produce precomposed characters (flags=ComposeCombinings
502
   * doesn't do it), so we need another pass over the data here.
503
   * We can do this in-place, as the string can only get
504
   * shorter.
505
   *
506
   * (Actually in theory there should be an internal
507
   * decomposition and reordering before the actual composition
508
   * step.  But we should be able to rely on that we always get
509
   * fully decomposed strings for input, so this can't create
510
   * problems in reality.)
511
   */
512
  CFUniCharPrecompose(
513
    (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
514
    (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
515
516
  native_to_le(*outbuf, dstPrecomposedUsed*2);
517
518
  *inbuf += srcCharsUsed;
519
  *inbytesleft -= srcCharsUsed;
520
  *outbuf += dstPrecomposedUsed*2;
521
  *outbytesleft -= dstPrecomposedUsed*2;
522
523
  return 0;
524
}
525
526
static size_t macosxfs_encoding_push(
527
  void *cd,       /* Encoder handle */
528
  const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
529
  char **outbuf, size_t *outbytesleft)  /* Script string */
530
{
531
  static const int script_code = kCFStringEncodingUTF8;
532
  static UniChar *buffer = NULL;
533
  static size_t buflen = 0;
534
  UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
535
536
  (void) cd; /* UNUSED */
537
538
  if (0 == *inbytesleft) {
539
    return 0;
540
  }
541
542
  buffer = set_ucbuffer_with_le(
543
    buffer, &buflen, *inbuf, *inbytesleft);
544
545
  result = CFStringEncodingUnicodeToBytes(
546
    script_code, kCFStringEncodingUseHFSPlusCanonical,
547
    buffer, *inbytesleft/2, &srcCharsUsed,
548
    *outbuf, *outbytesleft, &dstCharsUsed);
549
550
  switch(result) {
551
  case kCFStringEncodingConversionSuccess:
552
    if (*inbytesleft/2 == srcCharsUsed) {
553
      break;
554
    }
555
556
    FALL_THROUGH;
557
  case kCFStringEncodingInsufficientOutputBufferLength:
558
    debug_out("String conversion: "
559
        "Output buffer too small\n");
560
    hexdump("UTF16LE->UTF8 (new) input",
561
      *inbuf, *inbytesleft);
562
    errno = E2BIG;
563
    return -1;
564
  case kCFStringEncodingInvalidInputStream:
565
    /*
566
     * HACK: smbd/open.c:check_for_pipe():is_legal_name()
567
     * cuts a pathname to 10 characters blindly.  Suppress
568
     * the debug output in those cases.
569
     */
570
    if(10 != *inbytesleft) {
571
      debug_out("String conversion: "
572
          "Invalid input sequence\n");
573
      hexdump("UTF16LE->UTF8 (new) input",
574
        *inbuf, *inbytesleft);
575
    }
576
    errno = EILSEQ;
577
    return -1;
578
  case kCFStringEncodingConverterUnavailable:
579
    debug_out("String conversion: "
580
        "Unknown encoding\n");
581
    hexdump("UTF16LE->UTF8 (new) input",
582
      *inbuf, *inbytesleft);
583
    errno = EINVAL;
584
    return -1;
585
  }
586
587
  *inbuf += srcCharsUsed*2;
588
  *inbytesleft -= srcCharsUsed*2;
589
  *outbuf += dstCharsUsed;
590
  *outbytesleft -= dstCharsUsed;
591
592
  return 0;
593
}
594
595
#endif /* USE_INTERNAL_API */
596
597
#else /* DARWIN */
598
599
void charset_macosfs_dummy(void);
600
void charset_macosfs_dummy(void)
601
0
{
602
0
  return;
603
0
}
604
605
#endif /* DARWIN */