/src/samba/lib/util/charset/charset_macosxfs.c

Source
/*
   Unix SMB/CIFS implementation.
   Samba charset module for Mac OS X/Darwin
   Copyright (C) Benjamin Riefenstahl 2003

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/*
 * modules/charset_macosxfs.c
 *
 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
 * and display encoding.
 *
 * Actually two implementations are provided here.  The default
 * implementation is based on the official CFString API.  The other is
 * based on internal CFString APIs as defined in the OpenDarwin
 * source.
 */

#include "replace.h"
#include "charset.h"
#include "charset_proto.h"
#include "lib/util/debug.h"
#undef realloc

#ifdef DARWINOS

/*
 * Include OS frameworks.  These are only needed in this module.
 */
#include <CoreFoundation/CFString.h>

/*
 * See if autoconf has found us the internal headers in some form.
 */
#if defined(HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H)
# include <CoreFoundation/CFStringEncodingConverter.h>
# include <CoreFoundation/CFUnicodePrecomposition.h>
# define USE_INTERNAL_API 1
#elif defined(HAVE_CFSTRINGENCODINGCONVERTER_H)
# include <CFStringEncodingConverter.h>
# include <CFUnicodePrecomposition.h>
# define USE_INTERNAL_API 1
#endif

/*
 * Compile time configuration: Do we want debug output?
 */
/* #define DEBUG_STRINGS 1 */

/*
 * A simple, but efficient memory provider for our buffers.
 */
static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
{
  if (newsize > *size) {
    *size = newsize + 128;
    buffer = realloc(buffer, *size);
  }
  return buffer;
}

/*
 * While there is a version of OpenDarwin for intel, the usual case is
 * big-endian PPC.  So we need byte swapping to handle the
 * little-endian byte order of the network protocol.  We also need an
 * additional dynamic buffer to do this work for incoming data blocks,
 * because we have to consider the original data as constant.
 *
 * We abstract the differences away by providing a simple facade with
 * these functions/macros:
 *
 *  le_to_native(dst,src,len)
 *  native_to_le(cp,len)
 *  set_ucbuffer_with_le(buffer,bufsize,data,size)
 *  set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
 */
#ifdef WORDS_BIGENDIAN

static inline void swap_bytes (char * dst, const char * src, size_t len)
{
  const char *srcend = src + len;
  while (src < srcend) {
    dst[0] = src[1];
    dst[1] = src[0];
    dst += 2;
    src += 2;
  }
}
static inline void swap_bytes_inplace (char * cp, size_t len)
{
  char temp;
  char *end = cp + len;
  while (cp  < end) {
    temp = cp[1];
    cp[1] = cp[0];
    cp[0] = temp;
    cp += 2;
  }
}

#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
#define native_to_le(cp,len)    swap_bytes_inplace(cp,len)
#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
  set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)

#else /* ! WORDS_BIGENDIAN */

#define le_to_native(dst,src,len) memcpy(dst,src,len)
#define native_to_le(cp,len)    /* nothing */
#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
  (((void)(bufsize)),(UniChar*)(data))

#endif

static inline UniChar *set_ucbuffer_with_le_copy (
  UniChar *buffer, size_t *bufsize,
  const void *data, size_t size, size_t reserve)
{
  buffer = resize_buffer(buffer, bufsize, size+reserve);
  le_to_native((char*)buffer,data,size);
  return buffer;
}


/*
 * A simple hexdump function for debugging error conditions.
 */
#define debug_out(s)  DEBUG(0,(s))

#ifdef DEBUG_STRINGS

static void hexdump( const char * label, const char * s, size_t len )
{
  size_t restlen = len;
  debug_out("<<<<<<<\n");
  debug_out(label);
  debug_out("\n");
  while (restlen > 0) {
    char line[100];
    size_t i, j;
    char * d = line;
#undef sprintf
    d += sprintf(d, "%04X ", (unsigned)(len-restlen));
    *d++ = ' ';
    for( i = 0; i<restlen && i<8; ++i ) {
      d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
    }
    for( j = i; j<8; ++j ) {
      d += sprintf(d, "   ");
    }
    *d++ = ' ';
    for( i = 8; i<restlen && i<16; ++i ) {
      d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
    }
    for( j = i; j<16; ++j ) {
      d += sprintf(d, "   ");
    }
    *d++ = ' ';
    for( i = 0; i<restlen && i<16; ++i ) {
      if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
        *d++ = '.';
      else
        *d++ = s[i];
    }
    *d++ = '\n';
    *d = 0;
    restlen -= i;
    s += i;
    debug_out(line);
  }
  debug_out(">>>>>>>\n");
}

#else /* !DEBUG_STRINGS */

#define hexdump(label,s,len) /* nothing */

#endif


#if !USE_INTERNAL_API

/*
 * An implementation based on documented Mac OS X APIs.
 *
 * This does a certain amount of memory management, creating and
 * manipulating CFString objects.  We try to minimize the impact by
 * keeping those objects around and re-using them.  We also use
 * external backing store for the CFStrings where this is possible and
 * benficial.
 *
 * The Unicode normalizations forms available at this level are
 * generic, not specifically for the file system.  So they may not be
 * perfect fits.
 */
size_t macosxfs_encoding_pull(
  void *cd,       /* Encoder handle */
  const char **inbuf, size_t *inbytesleft, /* Script string */
  char **outbuf, size_t *outbytesleft)  /* UTF-16-LE string */
{
  static const int script_code = kCFStringEncodingUTF8;
  static CFMutableStringRef cfstring = NULL;
  size_t outsize;
  CFRange range;

  (void) cd; /* UNUSED */

  if (0 == *inbytesleft) {
    return 0;
  }

  if (NULL == cfstring) {
    /*
     * A version with an external backing store as in the
     * push function should have been more efficient, but
     * testing shows, that it is actually slower (!).
     * Maybe kCFAllocatorDefault gets shortcut evaluation
     * internally, while kCFAllocatorNull doesn't.
     */
    cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
  }

  /*
   * Three methods of appending to a CFString, choose the most
   * efficient.
   */
  if (0 == (*inbuf)[*inbytesleft-1]) {
    CFStringAppendCString(cfstring, *inbuf, script_code);
  } else if (*inbytesleft <= 255) {
    Str255 buffer;
    buffer[0] = *inbytesleft;
    memcpy(buffer+1, *inbuf, buffer[0]);
    CFStringAppendPascalString(cfstring, buffer, script_code);
  } else {
    /*
     * We would like to use a fixed buffer and a loop
     * here, but then we can't guarantee that the input is
     * well-formed UTF-8, as we are supposed to do.
     */
    static char *buffer = NULL;
    static size_t buflen = 0;
    buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
    memcpy(buffer, *inbuf, *inbytesleft);
    buffer[*inbytesleft] = 0;
    CFStringAppendCString(cfstring, *inbuf, script_code);
  }

  /*
   * Compose characters, using the non-canonical composition
   * form.
   */
  CFStringNormalize(cfstring, kCFStringNormalizationFormC);

  outsize = CFStringGetLength(cfstring);
  range = CFRangeMake(0,outsize);

  if (outsize == 0) {
    /*
     * HACK: smbd/mangle_hash2.c:is_legal_name() expects
     * errors here.  That function will always pass 2
     * characters.  smbd/open.c:check_for_pipe() cuts a
     * patchname to 10 characters blindly.  Suppress the
     * debug output in those cases.
     */
    if(2 != *inbytesleft && 10 != *inbytesleft) {
      debug_out("String conversion: "
          "An unknown error occurred\n");
      hexdump("UTF8->UTF16LE (old) input",
        *inbuf, *inbytesleft);
    }
    errno = EILSEQ; /* Not sure, but this is what we have
         * actually seen. */
    return -1;
  }
  if (outsize*2 > *outbytesleft) {
    CFStringDelete(cfstring, range);
    debug_out("String conversion: "
        "Output buffer too small\n");
    hexdump("UTF8->UTF16LE (old) input",
      *inbuf, *inbytesleft);
    errno = E2BIG;
    return -1;
  }

        CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
  CFStringDelete(cfstring, range);

  native_to_le(*outbuf, outsize*2);

  /*
   * Add a converted null byte, if the CFString conversions
   * prevented that until now.
   */
  if (0 == (*inbuf)[*inbytesleft-1] &&
      (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {

    if ((outsize*2+2) > *outbytesleft) {
      debug_out("String conversion: "
          "Output buffer too small\n");
      hexdump("UTF8->UTF16LE (old) input",
        *inbuf, *inbytesleft);
      errno = E2BIG;
      return -1;
    }

    (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
    outsize += 2;
  }

  *inbuf += *inbytesleft;
  *inbytesleft = 0;
  *outbuf += outsize*2;
  *outbytesleft -= outsize*2;

  return 0;
}

size_t macosxfs_encoding_push(
  void *cd,       /* Encoder handle */
  const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
  char **outbuf, size_t *outbytesleft)  /* Script string */
{
  static const int script_code = kCFStringEncodingUTF8;
  static CFMutableStringRef cfstring = NULL;
  static UniChar *buffer = NULL;
  static size_t buflen = 0;
  CFIndex outsize, cfsize, charsconverted;

  (void) cd; /* UNUSED */

  if (0 == *inbytesleft) {
    return 0;
  }

  /*
   * We need a buffer that can hold 4 times the original data,
   * because that is the theoretical maximum that decomposition
   * can create currently (in Unicode 4.0).
   */
  buffer = set_ucbuffer_with_le_copy(
    buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);

  if (NULL == cfstring) {
    cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
      kCFAllocatorDefault,
      buffer, *inbytesleft/2, buflen/2,
      kCFAllocatorNull);
  } else {
    CFStringSetExternalCharactersNoCopy(
      cfstring,
      buffer, *inbytesleft/2, buflen/2);
  }

  /*
   * Decompose characters, using the non-canonical decomposition
   * form.
   *
   * NB: This isn't exactly what HFS+ wants (see note on
   * kCFStringEncodingUseHFSPlusCanonical in
   * CFStringEncodingConverter.h), but AFAIK it's the best that
   * the official API can do.
   */
  CFStringNormalize(cfstring, kCFStringNormalizationFormD);

  cfsize = CFStringGetLength(cfstring);
  charsconverted = CFStringGetBytes(
    cfstring, CFRangeMake(0,cfsize),
    script_code, 0, false,
    *(UInt8 **)outbuf, *outbytesleft, &outsize);

  if (0 == charsconverted) {
    debug_out("String conversion: "
        "Buffer too small or not convertible\n");
    hexdump("UTF16LE->UTF8 (old) input",
      *inbuf, *inbytesleft);
    errno = EILSEQ; /* Probably more likely. */
    return -1;
  }

  /*
   * Add a converted null byte, if the CFString conversions
   * prevented that until now.
   */
  if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
      (0 != (*outbuf)[outsize-1])) {

    if (((size_t)outsize+1) > *outbytesleft) {
      debug_out("String conversion: "
          "Output buffer too small\n");
      hexdump("UTF16LE->UTF8 (old) input",
        *inbuf, *inbytesleft);
      errno = E2BIG;
      return -1;
    }

    (*outbuf)[outsize] = 0;
    ++outsize;
  }

  *inbuf += *inbytesleft;
  *inbytesleft = 0;
  *outbuf += outsize;
  *outbytesleft -= outsize;

  return 0;
}

#else /* USE_INTERNAL_API */

/*
 * An implementation based on internal code as known from the
 * OpenDarwin CVS.
 *
 * This code doesn't need much memory management because it uses
 * functions that operate on the raw memory directly.
 *
 * The push routine here is faster and more compatible with HFS+ than
 * the other implementation above.  The pull routine is only faster
 * for some strings, slightly slower for others.  The pull routine
 * looses because it has to iterate over the data twice, once to
 * decode UTF-8 and than to do the character composition required by
 * Windows.
 */
static size_t macosxfs_encoding_pull(
  void *cd,       /* Encoder handle */
  const char **inbuf, size_t *inbytesleft, /* Script string */
  char **outbuf, size_t *outbytesleft)  /* UTF-16-LE string */
{
  static const int script_code = kCFStringEncodingUTF8;
  UInt32 srcCharsUsed = 0;
  UInt32 dstCharsUsed = 0;
  UInt32 result;
  uint32_t dstDecomposedUsed = 0;
  uint32_t dstPrecomposedUsed = 0;

  (void) cd; /* UNUSED */

  if (0 == *inbytesleft) {
    return 0;
  }

        result = CFStringEncodingBytesToUnicode(
    script_code, kCFStringEncodingComposeCombinings,
    *inbuf, *inbytesleft, &srcCharsUsed,
    (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);

  switch(result) {
  case kCFStringEncodingConversionSuccess:
    if (*inbytesleft == srcCharsUsed) {
      break;
    }

    FALL_THROUGH;
  case kCFStringEncodingInsufficientOutputBufferLength:
    debug_out("String conversion: "
        "Output buffer too small\n");
    hexdump("UTF8->UTF16LE (new) input",
      *inbuf, *inbytesleft);
    errno = E2BIG;
    return -1;
  case kCFStringEncodingInvalidInputStream:
    /*
     * HACK: smbd/mangle_hash2.c:is_legal_name() expects
     * errors here.  That function will always pass 2
     * characters.  smbd/open.c:check_for_pipe() cuts a
     * patchname to 10 characters blindly.  Suppress the
     * debug output in those cases.
     */
    if(2 != *inbytesleft && 10 != *inbytesleft) {
      debug_out("String conversion: "
          "Invalid input sequence\n");
      hexdump("UTF8->UTF16LE (new) input",
        *inbuf, *inbytesleft);
    }
    errno = EILSEQ;
    return -1;
  case kCFStringEncodingConverterUnavailable:
    debug_out("String conversion: "
        "Unknown encoding\n");
    hexdump("UTF8->UTF16LE (new) input",
      *inbuf, *inbytesleft);
    errno = EINVAL;
    return -1;
  }

  /*
   * It doesn't look like CFStringEncodingBytesToUnicode() can
   * produce precomposed characters (flags=ComposeCombinings
   * doesn't do it), so we need another pass over the data here.
   * We can do this in-place, as the string can only get
   * shorter.
   *
   * (Actually in theory there should be an internal
   * decomposition and reordering before the actual composition
   * step.  But we should be able to rely on that we always get
   * fully decomposed strings for input, so this can't create
   * problems in reality.)
   */
  CFUniCharPrecompose(
    (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
    (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);

  native_to_le(*outbuf, dstPrecomposedUsed*2);

  *inbuf += srcCharsUsed;
  *inbytesleft -= srcCharsUsed;
  *outbuf += dstPrecomposedUsed*2;
  *outbytesleft -= dstPrecomposedUsed*2;

  return 0;
}

static size_t macosxfs_encoding_push(
  void *cd,       /* Encoder handle */
  const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
  char **outbuf, size_t *outbytesleft)  /* Script string */
{
  static const int script_code = kCFStringEncodingUTF8;
  static UniChar *buffer = NULL;
  static size_t buflen = 0;
  UInt32 srcCharsUsed=0, dstCharsUsed=0, result;

  (void) cd; /* UNUSED */

  if (0 == *inbytesleft) {
    return 0;
  }

  buffer = set_ucbuffer_with_le(
    buffer, &buflen, *inbuf, *inbytesleft);

  result = CFStringEncodingUnicodeToBytes(
    script_code, kCFStringEncodingUseHFSPlusCanonical,
    buffer, *inbytesleft/2, &srcCharsUsed,
    *outbuf, *outbytesleft, &dstCharsUsed);

  switch(result) {
  case kCFStringEncodingConversionSuccess:
    if (*inbytesleft/2 == srcCharsUsed) {
      break;
    }

    FALL_THROUGH;
  case kCFStringEncodingInsufficientOutputBufferLength:
    debug_out("String conversion: "
        "Output buffer too small\n");
    hexdump("UTF16LE->UTF8 (new) input",
      *inbuf, *inbytesleft);
    errno = E2BIG;
    return -1;
  case kCFStringEncodingInvalidInputStream:
    /*
     * HACK: smbd/open.c:check_for_pipe():is_legal_name()
     * cuts a pathname to 10 characters blindly.  Suppress
     * the debug output in those cases.
     */
    if(10 != *inbytesleft) {
      debug_out("String conversion: "
          "Invalid input sequence\n");
      hexdump("UTF16LE->UTF8 (new) input",
        *inbuf, *inbytesleft);
    }
    errno = EILSEQ;
    return -1;
  case kCFStringEncodingConverterUnavailable:
    debug_out("String conversion: "
        "Unknown encoding\n");
    hexdump("UTF16LE->UTF8 (new) input",
      *inbuf, *inbytesleft);
    errno = EINVAL;
    return -1;
  }

  *inbuf += srcCharsUsed*2;
  *inbytesleft -= srcCharsUsed*2;
  *outbuf += dstCharsUsed;
  *outbytesleft -= dstCharsUsed;

  return 0;
}

#endif /* USE_INTERNAL_API */

#else /* DARWIN */

void charset_macosfs_dummy(void);
void charset_macosfs_dummy(void)
{
  return;
}

#endif /* DARWIN */

Coverage Report

Created: 2025-12-31 06:20

Line	Count	Source
1		/*
2		Unix SMB/CIFS implementation.
3		Samba charset module for Mac OS X/Darwin
4		Copyright (C) Benjamin Riefenstahl 2003
5
6		This program is free software; you can redistribute it and/or modify
7		it under the terms of the GNU General Public License as published by
8		the Free Software Foundation; either version 3 of the License, or
9		(at your option) any later version.
10
11		This program is distributed in the hope that it will be useful,
12		but WITHOUT ANY WARRANTY; without even the implied warranty of
13		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		GNU General Public License for more details.
15
16		You should have received a copy of the GNU General Public License
17		along with this program. If not, see <http://www.gnu.org/licenses/>.
18		*/
19
20		/*
21		* modules/charset_macosxfs.c
22		*
23		* A Samba charset module to use on Mac OS X/Darwin as the filesystem
24		* and display encoding.
25		*
26		* Actually two implementations are provided here. The default
27		* implementation is based on the official CFString API. The other is
28		* based on internal CFString APIs as defined in the OpenDarwin
29		* source.
30		*/
31
32		#include "replace.h"
33		#include "charset.h"
34		#include "charset_proto.h"
35		#include "lib/util/debug.h"
36		#undef realloc
37
38		#ifdef DARWINOS
39
40		/*
41		* Include OS frameworks. These are only needed in this module.
42		*/
43		#include <CoreFoundation/CFString.h>
44
45		/*
46		* See if autoconf has found us the internal headers in some form.
47		*/
48		#if defined(HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H)
49		# include <CoreFoundation/CFStringEncodingConverter.h>
50		# include <CoreFoundation/CFUnicodePrecomposition.h>
51		# define USE_INTERNAL_API 1
52		#elif defined(HAVE_CFSTRINGENCODINGCONVERTER_H)
53		# include <CFStringEncodingConverter.h>
54		# include <CFUnicodePrecomposition.h>
55		# define USE_INTERNAL_API 1
56		#endif
57
58		/*
59		* Compile time configuration: Do we want debug output?
60		*/
61		/* #define DEBUG_STRINGS 1 */
62
63		/*
64		* A simple, but efficient memory provider for our buffers.
65		*/
66		static inline void resize_buffer (void buffer, size_t *size, size_t newsize)
67		{
68		if (newsize > *size) {
69		*size = newsize + 128;
70		buffer = realloc(buffer, *size);
71		}
72		return buffer;
73		}
74
75		/*
76		* While there is a version of OpenDarwin for intel, the usual case is
77		* big-endian PPC. So we need byte swapping to handle the
78		* little-endian byte order of the network protocol. We also need an
79		* additional dynamic buffer to do this work for incoming data blocks,
80		* because we have to consider the original data as constant.
81		*
82		* We abstract the differences away by providing a simple facade with
83		* these functions/macros:
84		*
85		* le_to_native(dst,src,len)
86		* native_to_le(cp,len)
87		* set_ucbuffer_with_le(buffer,bufsize,data,size)
88		* set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
89		*/
90		#ifdef WORDS_BIGENDIAN
91
92		static inline void swap_bytes (char * dst, const char * src, size_t len)
93		{
94		const char *srcend = src + len;
95		while (src < srcend) {
96		dst[0] = src[1];
97		dst[1] = src[0];
98		dst += 2;
99		src += 2;
100		}
101		}
102		static inline void swap_bytes_inplace (char * cp, size_t len)
103		{
104		char temp;
105		char *end = cp + len;
106		while (cp < end) {
107		temp = cp[1];
108		cp[1] = cp[0];
109		cp[0] = temp;
110		cp += 2;
111		}
112		}
113
114		#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
115		#define native_to_le(cp,len) swap_bytes_inplace(cp,len)
116		#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
117		set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
118
119		#else /* ! WORDS_BIGENDIAN */
120
121		#define le_to_native(dst,src,len) memcpy(dst,src,len)
122		#define native_to_le(cp,len) /* nothing */
123		#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
124		(((void)(bufsize)),(UniChar*)(data))
125
126		#endif
127
128		static inline UniChar *set_ucbuffer_with_le_copy (
129		UniChar buffer, size_t bufsize,
130		const void *data, size_t size, size_t reserve)
131		{
132		buffer = resize_buffer(buffer, bufsize, size+reserve);
133		le_to_native((char*)buffer,data,size);
134		return buffer;
135		}
136
137
138		/*
139		* A simple hexdump function for debugging error conditions.
140		*/
141		#define debug_out(s) DEBUG(0,(s))
142
143		#ifdef DEBUG_STRINGS
144
145		static void hexdump( const char * label, const char * s, size_t len )
146		{
147		size_t restlen = len;
148		debug_out("<<<<<<<\n");
149		debug_out(label);
150		debug_out("\n");
151		while (restlen > 0) {
152		char line[100];
153		size_t i, j;
154		char * d = line;
155		#undef sprintf
156		d += sprintf(d, "%04X ", (unsigned)(len-restlen));
157		*d++ = ' ';
158		for( i = 0; i<restlen && i<8; ++i ) {
159		d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
160		}
161		for( j = i; j<8; ++j ) {
162		d += sprintf(d, " ");
163		}
164		*d++ = ' ';
165		for( i = 8; i<restlen && i<16; ++i ) {
166		d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
167		}
168		for( j = i; j<16; ++j ) {
169		d += sprintf(d, " ");
170		}
171		*d++ = ' ';
172		for( i = 0; i<restlen && i<16; ++i ) {
173		if(s[i] < ' ' \|\| s[i] >= 0x7F \|\| !isprint(s[i]))
174		*d++ = '.';
175		else
176		*d++ = s[i];
177		}
178		*d++ = '\n';
179		*d = 0;
180		restlen -= i;
181		s += i;
182		debug_out(line);
183		}
184		debug_out(">>>>>>>\n");
185		}
186
187		#else /* !DEBUG_STRINGS */
188
189		#define hexdump(label,s,len) /* nothing */
190
191		#endif
192
193
194		#if !USE_INTERNAL_API
195
196		/*
197		* An implementation based on documented Mac OS X APIs.
198		*
199		* This does a certain amount of memory management, creating and
200		* manipulating CFString objects. We try to minimize the impact by
201		* keeping those objects around and re-using them. We also use
202		* external backing store for the CFStrings where this is possible and
203		* benficial.
204		*
205		* The Unicode normalizations forms available at this level are
206		* generic, not specifically for the file system. So they may not be
207		* perfect fits.
208		*/
209		size_t macosxfs_encoding_pull(
210		void cd, / Encoder handle */
211		const char *inbuf, size_t inbytesleft, /* Script string */
212		char *outbuf, size_t outbytesleft) /* UTF-16-LE string */
213		{
214		static const int script_code = kCFStringEncodingUTF8;
215		static CFMutableStringRef cfstring = NULL;
216		size_t outsize;
217		CFRange range;
218
219		(void) cd; /* UNUSED */
220
221		if (0 == *inbytesleft) {
222		return 0;
223		}
224
225		if (NULL == cfstring) {
226		/*
227		* A version with an external backing store as in the
228		* push function should have been more efficient, but
229		* testing shows, that it is actually slower (!).
230		* Maybe kCFAllocatorDefault gets shortcut evaluation
231		* internally, while kCFAllocatorNull doesn't.
232		*/
233		cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
234		}
235
236		/*
237		* Three methods of appending to a CFString, choose the most
238		* efficient.
239		*/
240		if (0 == (inbuf)[inbytesleft-1]) {
241		CFStringAppendCString(cfstring, *inbuf, script_code);
242		} else if (*inbytesleft <= 255) {
243		Str255 buffer;
244		buffer[0] = *inbytesleft;
245		memcpy(buffer+1, *inbuf, buffer[0]);
246		CFStringAppendPascalString(cfstring, buffer, script_code);
247		} else {
248		/*
249		* We would like to use a fixed buffer and a loop
250		* here, but then we can't guarantee that the input is
251		* well-formed UTF-8, as we are supposed to do.
252		*/
253		static char *buffer = NULL;
254		static size_t buflen = 0;
255		buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
256		memcpy(buffer, inbuf, inbytesleft);
257		buffer[*inbytesleft] = 0;
258		CFStringAppendCString(cfstring, *inbuf, script_code);
259		}
260
261		/*
262		* Compose characters, using the non-canonical composition
263		* form.
264		*/
265		CFStringNormalize(cfstring, kCFStringNormalizationFormC);
266
267		outsize = CFStringGetLength(cfstring);
268		range = CFRangeMake(0,outsize);
269
270		if (outsize == 0) {
271		/*
272		* HACK: smbd/mangle_hash2.c:is_legal_name() expects
273		* errors here. That function will always pass 2
274		* characters. smbd/open.c:check_for_pipe() cuts a
275		* patchname to 10 characters blindly. Suppress the
276		* debug output in those cases.
277		*/
278		if(2 != inbytesleft && 10 != inbytesleft) {
279		debug_out("String conversion: "
280		"An unknown error occurred\n");
281		hexdump("UTF8->UTF16LE (old) input",
282		inbuf, inbytesleft);
283		}
284		errno = EILSEQ; /* Not sure, but this is what we have
285		* actually seen. */
286		return -1;
287		}
288		if (outsize2 > outbytesleft) {
289		CFStringDelete(cfstring, range);
290		debug_out("String conversion: "
291		"Output buffer too small\n");
292		hexdump("UTF8->UTF16LE (old) input",
293		inbuf, inbytesleft);
294		errno = E2BIG;
295		return -1;
296		}
297
298		CFStringGetCharacters(cfstring, range, (UniChar)outbuf);
299		CFStringDelete(cfstring, range);
300
301		native_to_le(outbuf, outsize2);
302
303		/*
304		* Add a converted null byte, if the CFString conversions
305		* prevented that until now.
306		*/
307		if (0 == (inbuf)[inbytesleft-1] &&
308		(0 != (outbuf)[outsize2-1] \|\| 0 != (outbuf)[outsize2-2])) {
309
310		if ((outsize2+2) > outbytesleft) {
311		debug_out("String conversion: "
312		"Output buffer too small\n");
313		hexdump("UTF8->UTF16LE (old) input",
314		inbuf, inbytesleft);
315		errno = E2BIG;
316		return -1;
317		}
318
319		(outbuf)[outsize2] = (outbuf)[outsize2+1] = 0;
320		outsize += 2;
321		}
322
323		inbuf += inbytesleft;
324		*inbytesleft = 0;
325		outbuf += outsize2;
326		outbytesleft -= outsize2;
327
328		return 0;
329		}
330
331		size_t macosxfs_encoding_push(
332		void cd, / Encoder handle */
333		const char *inbuf, size_t inbytesleft, /* UTF-16-LE string */
334		char *outbuf, size_t outbytesleft) /* Script string */
335		{
336		static const int script_code = kCFStringEncodingUTF8;
337		static CFMutableStringRef cfstring = NULL;
338		static UniChar *buffer = NULL;
339		static size_t buflen = 0;
340		CFIndex outsize, cfsize, charsconverted;
341
342		(void) cd; /* UNUSED */
343
344		if (0 == *inbytesleft) {
345		return 0;
346		}
347
348		/*
349		* We need a buffer that can hold 4 times the original data,
350		* because that is the theoretical maximum that decomposition
351		* can create currently (in Unicode 4.0).
352		*/
353		buffer = set_ucbuffer_with_le_copy(
354		buffer, &buflen, inbuf, inbytesleft, 3 * *inbytesleft);
355
356		if (NULL == cfstring) {
357		cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
358		kCFAllocatorDefault,
359		buffer, *inbytesleft/2, buflen/2,
360		kCFAllocatorNull);
361		} else {
362		CFStringSetExternalCharactersNoCopy(
363		cfstring,
364		buffer, *inbytesleft/2, buflen/2);
365		}
366
367		/*
368		* Decompose characters, using the non-canonical decomposition
369		* form.
370		*
371		* NB: This isn't exactly what HFS+ wants (see note on
372		* kCFStringEncodingUseHFSPlusCanonical in
373		* CFStringEncodingConverter.h), but AFAIK it's the best that
374		* the official API can do.
375		*/
376		CFStringNormalize(cfstring, kCFStringNormalizationFormD);
377
378		cfsize = CFStringGetLength(cfstring);
379		charsconverted = CFStringGetBytes(
380		cfstring, CFRangeMake(0,cfsize),
381		script_code, 0, false,
382		(UInt8 )outbuf, outbytesleft, &outsize);
383
384		if (0 == charsconverted) {
385		debug_out("String conversion: "
386		"Buffer too small or not convertible\n");
387		hexdump("UTF16LE->UTF8 (old) input",
388		inbuf, inbytesleft);
389		errno = EILSEQ; /* Probably more likely. */
390		return -1;
391		}
392
393		/*
394		* Add a converted null byte, if the CFString conversions
395		* prevented that until now.
396		*/
397		if (0 == (inbuf)[inbytesleft-1] && 0 == (inbuf)[inbytesleft-2] &&
398		(0 != (*outbuf)[outsize-1])) {
399
400		if (((size_t)outsize+1) > *outbytesleft) {
401		debug_out("String conversion: "
402		"Output buffer too small\n");
403		hexdump("UTF16LE->UTF8 (old) input",
404		inbuf, inbytesleft);
405		errno = E2BIG;
406		return -1;
407		}
408
409		(*outbuf)[outsize] = 0;
410		++outsize;
411		}
412
413		inbuf += inbytesleft;
414		*inbytesleft = 0;
415		*outbuf += outsize;
416		*outbytesleft -= outsize;
417
418		return 0;
419		}
420
421		#else /* USE_INTERNAL_API */
422
423		/*
424		* An implementation based on internal code as known from the
425		* OpenDarwin CVS.
426		*
427		* This code doesn't need much memory management because it uses
428		* functions that operate on the raw memory directly.
429		*
430		* The push routine here is faster and more compatible with HFS+ than
431		* the other implementation above. The pull routine is only faster
432		* for some strings, slightly slower for others. The pull routine
433		* looses because it has to iterate over the data twice, once to
434		* decode UTF-8 and than to do the character composition required by
435		* Windows.
436		*/
437		static size_t macosxfs_encoding_pull(
438		void cd, / Encoder handle */
439		const char *inbuf, size_t inbytesleft, /* Script string */
440		char *outbuf, size_t outbytesleft) /* UTF-16-LE string */
441		{
442		static const int script_code = kCFStringEncodingUTF8;
443		UInt32 srcCharsUsed = 0;
444		UInt32 dstCharsUsed = 0;
445		UInt32 result;
446		uint32_t dstDecomposedUsed = 0;
447		uint32_t dstPrecomposedUsed = 0;
448
449		(void) cd; /* UNUSED */
450
451		if (0 == *inbytesleft) {
452		return 0;
453		}
454
455		result = CFStringEncodingBytesToUnicode(
456		script_code, kCFStringEncodingComposeCombinings,
457		inbuf, inbytesleft, &srcCharsUsed,
458		(UniChar)outbuf, *outbytesleft, &dstCharsUsed);
459
460		switch(result) {
461		case kCFStringEncodingConversionSuccess:
462		if (*inbytesleft == srcCharsUsed) {
463		break;
464		}
465
466		FALL_THROUGH;
467		case kCFStringEncodingInsufficientOutputBufferLength:
468		debug_out("String conversion: "
469		"Output buffer too small\n");
470		hexdump("UTF8->UTF16LE (new) input",
471		inbuf, inbytesleft);
472		errno = E2BIG;
473		return -1;
474		case kCFStringEncodingInvalidInputStream:
475		/*
476		* HACK: smbd/mangle_hash2.c:is_legal_name() expects
477		* errors here. That function will always pass 2
478		* characters. smbd/open.c:check_for_pipe() cuts a
479		* patchname to 10 characters blindly. Suppress the
480		* debug output in those cases.
481		*/
482		if(2 != inbytesleft && 10 != inbytesleft) {
483		debug_out("String conversion: "
484		"Invalid input sequence\n");
485		hexdump("UTF8->UTF16LE (new) input",
486		inbuf, inbytesleft);
487		}
488		errno = EILSEQ;
489		return -1;
490		case kCFStringEncodingConverterUnavailable:
491		debug_out("String conversion: "
492		"Unknown encoding\n");
493		hexdump("UTF8->UTF16LE (new) input",
494		inbuf, inbytesleft);
495		errno = EINVAL;
496		return -1;
497		}
498
499		/*
500		* It doesn't look like CFStringEncodingBytesToUnicode() can
501		* produce precomposed characters (flags=ComposeCombinings
502		* doesn't do it), so we need another pass over the data here.
503		* We can do this in-place, as the string can only get
504		* shorter.
505		*
506		* (Actually in theory there should be an internal
507		* decomposition and reordering before the actual composition
508		* step. But we should be able to rely on that we always get
509		* fully decomposed strings for input, so this can't create
510		* problems in reality.)
511		*/
512		CFUniCharPrecompose(
513		(const UTF16Char )outbuf, dstCharsUsed, &dstDecomposedUsed,
514		(UTF16Char )outbuf, dstCharsUsed, &dstPrecomposedUsed);
515
516		native_to_le(outbuf, dstPrecomposedUsed2);
517
518		*inbuf += srcCharsUsed;
519		*inbytesleft -= srcCharsUsed;
520		outbuf += dstPrecomposedUsed2;
521		outbytesleft -= dstPrecomposedUsed2;
522
523		return 0;
524		}
525
526		static size_t macosxfs_encoding_push(
527		void cd, / Encoder handle */
528		const char *inbuf, size_t inbytesleft, /* UTF-16-LE string */
529		char *outbuf, size_t outbytesleft) /* Script string */
530		{
531		static const int script_code = kCFStringEncodingUTF8;
532		static UniChar *buffer = NULL;
533		static size_t buflen = 0;
534		UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
535
536		(void) cd; /* UNUSED */
537
538		if (0 == *inbytesleft) {
539		return 0;
540		}
541
542		buffer = set_ucbuffer_with_le(
543		buffer, &buflen, inbuf, inbytesleft);
544
545		result = CFStringEncodingUnicodeToBytes(
546		script_code, kCFStringEncodingUseHFSPlusCanonical,
547		buffer, *inbytesleft/2, &srcCharsUsed,
548		outbuf, outbytesleft, &dstCharsUsed);
549
550		switch(result) {
551		case kCFStringEncodingConversionSuccess:
552		if (*inbytesleft/2 == srcCharsUsed) {
553		break;
554		}
555
556		FALL_THROUGH;
557		case kCFStringEncodingInsufficientOutputBufferLength:
558		debug_out("String conversion: "
559		"Output buffer too small\n");
560		hexdump("UTF16LE->UTF8 (new) input",
561		inbuf, inbytesleft);
562		errno = E2BIG;
563		return -1;
564		case kCFStringEncodingInvalidInputStream:
565		/*
566		* HACK: smbd/open.c:check_for_pipe():is_legal_name()
567		* cuts a pathname to 10 characters blindly. Suppress
568		* the debug output in those cases.
569		*/
570		if(10 != *inbytesleft) {
571		debug_out("String conversion: "
572		"Invalid input sequence\n");
573		hexdump("UTF16LE->UTF8 (new) input",
574		inbuf, inbytesleft);
575		}
576		errno = EILSEQ;
577		return -1;
578		case kCFStringEncodingConverterUnavailable:
579		debug_out("String conversion: "
580		"Unknown encoding\n");
581		hexdump("UTF16LE->UTF8 (new) input",
582		inbuf, inbytesleft);
583		errno = EINVAL;
584		return -1;
585		}
586
587		inbuf += srcCharsUsed2;
588		inbytesleft -= srcCharsUsed2;
589		*outbuf += dstCharsUsed;
590		*outbytesleft -= dstCharsUsed;
591
592		return 0;
593		}
594
595		#endif /* USE_INTERNAL_API */
596
597		#else /* DARWIN */
598
599		void charset_macosfs_dummy(void);
600		void charset_macosfs_dummy(void)
601	0	{
602	0	return;
603	0	}
604
605		#endif /* DARWIN */