/work/obj-fuzz/dist/include/js/CharacterEncoding.h

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef js_CharacterEncoding_h
#define js_CharacterEncoding_h

#include "mozilla/Range.h"

#include "js/TypeDecls.h"
#include "js/Utility.h"

class JSFlatString;

namespace JS {

/*
 * By default, all C/C++ 1-byte-per-character strings passed into the JSAPI
 * are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each
 * byte is treated as a 2-byte character, and there is no way to pass in a
 * string containing characters beyond U+00FF.
 */
class Latin1Chars : public mozilla::Range<Latin1Char>
{
    typedef mozilla::Range<Latin1Char> Base;

  public:
    using CharT = Latin1Char;

    Latin1Chars() : Base() {}
    Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
    Latin1Chars(const Latin1Char* aBytes, size_t aLength)
      : Base(const_cast<Latin1Char*>(aBytes), aLength)
    {}
    Latin1Chars(const char* aBytes, size_t aLength)
      : Base(reinterpret_cast<Latin1Char*>(const_cast<char*>(aBytes)), aLength)
    {}
};

/*
 * A Latin1Chars, but with \0 termination for C compatibility.
 */
class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
{
    typedef mozilla::RangedPtr<Latin1Char> Base;

  public:
    using CharT = Latin1Char;

    Latin1CharsZ() : Base(nullptr, 0) {}

    Latin1CharsZ(char* aBytes, size_t aLength)
      : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength)
    {
        MOZ_ASSERT(aBytes[aLength] == '\0');
    }

    Latin1CharsZ(Latin1Char* aBytes, size_t aLength)
      : Base(aBytes, aLength)
    {
        MOZ_ASSERT(aBytes[aLength] == '\0');
    }

    using Base::operator=;

    char* c_str() { return reinterpret_cast<char*>(get()); }
};

class UTF8Chars : public mozilla::Range<unsigned char>
{
    typedef mozilla::Range<unsigned char> Base;

  public:
    using CharT = unsigned char;

    UTF8Chars() : Base() {}
    UTF8Chars(char* aBytes, size_t aLength)
      : Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
    {}
    UTF8Chars(const char* aBytes, size_t aLength)
      : Base(reinterpret_cast<unsigned char*>(const_cast<char*>(aBytes)), aLength)
    {}
};

/*
 * SpiderMonkey also deals directly with UTF-8 encoded text in some places.
 */
class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
{
    typedef mozilla::RangedPtr<unsigned char> Base;

  public:
    using CharT = unsigned char;

    UTF8CharsZ() : Base(nullptr, 0) {}

    UTF8CharsZ(char* aBytes, size_t aLength)
      : Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
    {
        MOZ_ASSERT(aBytes[aLength] == '\0');
    }

    UTF8CharsZ(unsigned char* aBytes, size_t aLength)
      : Base(aBytes, aLength)
    {
        MOZ_ASSERT(aBytes[aLength] == '\0');
    }

    using Base::operator=;

    char* c_str() { return reinterpret_cast<char*>(get()); }
};

/*
 * A wrapper for a "const char*" that is encoded using UTF-8.
 * This class does not manage ownership of the data; that is left
 * to others.  This differs from UTF8CharsZ in that the chars are
 * const and it disallows assignment.
 */
class JS_PUBLIC_API(ConstUTF8CharsZ)
{
    const char* data_;

  public:
    using CharT = unsigned char;

    ConstUTF8CharsZ() : data_(nullptr)
    {}

    ConstUTF8CharsZ(const char* aBytes, size_t aLength)
      : data_(aBytes)
    {
        MOZ_ASSERT(aBytes[aLength] == '\0');
#ifdef DEBUG
        validate(aLength);
#endif
    }

    const void* get() const { return data_; }

    const char* c_str() const { return data_; }

    explicit operator bool() const { return data_ != nullptr; }

  private:
#ifdef DEBUG
    void validate(size_t aLength);
#endif
};

/*
 * SpiderMonkey uses a 2-byte character representation: it is a
 * 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,
 * but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a
 * sufficiently dedicated JavaScript program to be fully unicode-aware by
 * manually interpreting UTF-16 extension characters embedded in the JS
 * string.
 */
class TwoByteChars : public mozilla::Range<char16_t>
{
    typedef mozilla::Range<char16_t> Base;

  public:
    using CharT = char16_t;

    TwoByteChars() : Base() {}
    TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
    TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
};

/*
 * A TwoByteChars, but \0 terminated for compatibility with JSFlatString.
 */
class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
{
    typedef mozilla::RangedPtr<char16_t> Base;

  public:
    using CharT = char16_t;

    TwoByteCharsZ() : Base(nullptr, 0) {}

    TwoByteCharsZ(char16_t* chars, size_t length)
      : Base(chars, length)
    {
        MOZ_ASSERT(chars[length] == '\0');
    }

    using Base::operator=;
};

typedef mozilla::RangedPtr<const char16_t> ConstCharPtr;

/*
 * Like TwoByteChars, but the chars are const.
 */
class ConstTwoByteChars : public mozilla::Range<const char16_t>
{
    typedef mozilla::Range<const char16_t> Base;

  public:
    using CharT = char16_t;

    ConstTwoByteChars() : Base() {}
    ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
};

/*
 * Convert a 2-byte character sequence to "ISO-Latin-1". This works by
 * truncating each 2-byte pair in the sequence to a 1-byte pair. If the source
 * contains any UTF-16 extension characters, then this may give invalid Latin1
 * output. The returned string is zero terminated. The returned string or the
 * returned string's |start()| must be freed with JS_free or js_free,
 * respectively. If allocation fails, an OOM error will be set and the method
 * will return a nullptr chars (which can be tested for with the ! operator).
 * This method cannot trigger GC.
 */
extern Latin1CharsZ
LossyTwoByteCharsToNewLatin1CharsZ(JSContext* cx,
                                   const mozilla::Range<const char16_t> tbchars);

inline Latin1CharsZ
LossyTwoByteCharsToNewLatin1CharsZ(JSContext* cx, const char16_t* begin, size_t length)
{
    const mozilla::Range<const char16_t> tbchars(begin, length);
    return JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars);
}

template <typename CharT>
extern UTF8CharsZ
CharsToNewUTF8CharsZ(JSContext* maybeCx, const mozilla::Range<CharT> chars);

JS_PUBLIC_API(uint32_t)
Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length);

/*
 * Inflate bytes in UTF-8 encoding to char16_t.
 * - On error, returns an empty TwoByteCharsZ.
 * - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold
 *   its length;  the length value excludes the trailing null.
 */
extern JS_PUBLIC_API(TwoByteCharsZ)
UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

/*
 * Like UTF8CharsToNewTwoByteCharsZ, but for ConstUTF8CharsZ.
 */
extern JS_PUBLIC_API(TwoByteCharsZ)
UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);

/*
 * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
 * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
 * input.
 */
extern JS_PUBLIC_API(TwoByteCharsZ)
LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

extern JS_PUBLIC_API(TwoByteCharsZ)
LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);

/*
 * Returns the length of the char buffer required to encode |s| as UTF8.
 * Does not include the null-terminator.
 */
JS_PUBLIC_API(size_t)
GetDeflatedUTF8StringLength(JSFlatString* s);

/*
 * Encode |src| as UTF8. The caller must either ensure |dst| has enough space
 * to encode the entire string or pass the length of the buffer as |dstlenp|,
 * in which case the function will encode characters from the string until
 * the buffer is exhausted. Does not write the null terminator.
 *
 * If |dstlenp| is provided, it will be updated to hold the number of bytes
 * written to the buffer. If |numcharsp| is provided, it will be updated to hold
 * the number of Unicode characters written to the buffer (which can be less
 * than the length of the string, if the buffer is exhausted before the string
 * is fully encoded).
 */
JS_PUBLIC_API(void)
DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst,
                          size_t* dstlenp = nullptr, size_t* numcharsp = nullptr);

/*
 * The smallest character encoding capable of fully representing a particular
 * string.
 */
enum class SmallestEncoding {
    ASCII,
    Latin1,
    UTF16
};

/*
 * Returns the smallest encoding possible for the given string: if all
 * codepoints are <128 then ASCII, otherwise if all codepoints are <256
 * Latin-1, else UTF16.
 */
JS_PUBLIC_API(SmallestEncoding)
FindSmallestEncoding(UTF8Chars utf8);

/*
  * Return a null-terminated Latin-1 string copied from the input string,
  * storing its length (excluding null terminator) in |*outlen|.  Fail and
  * report an error if the string contains non-Latin-1 codepoints.  Returns
  * Latin1CharsZ() on failure.
 */
extern JS_PUBLIC_API(Latin1CharsZ)
UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

/*
 * Return a null-terminated Latin-1 string copied from the input string,
 * storing its length (excluding null terminator) in |*outlen|.  Non-Latin-1
 * codepoints are replaced by '?'.  Returns Latin1CharsZ() on failure.
 */
extern JS_PUBLIC_API(Latin1CharsZ)
LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

/*
 * Returns true if all characters in the given null-terminated string are
 * ASCII, i.e. < 0x80, false otherwise.
 */
extern JS_PUBLIC_API(bool)
StringIsASCII(const char* s);

} // namespace JS

inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }
inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); }

/**
 * DEPRECATED
 *
 * Allocate memory sufficient to contain the characters of |str| truncated to
 * Latin-1 and a trailing null terminator, fill the memory with the characters
 * interpreted in that manner plus the null terminator, and return a pointer to
 * the memory.
 *
 * This function *loses information* when it copies the characters of |str| if
 * |str| contains code units greater than 0xFF.  Additionally, users that
 * depend on null-termination will misinterpret the copied characters if |str|
 * contains any nulls.  Avoid using this function if possible, because it will
 * eventually be removed.
 */
extern JS_PUBLIC_API(JS::UniqueChars)
JS_EncodeStringToLatin1(JSContext* cx, JSString* str);

/**
 * DEPRECATED
 *
 * Same behavior as JS_EncodeStringToLatin1(), but encode into a UTF-8 string.
 *
 * This function *loses information* when it copies the characters of |str| if
 * |str| contains invalid UTF-16: U+FFFD REPLACEMENT CHARACTER will be copied
 * instead.
 *
 * The returned string is also subject to misinterpretation if |str| contains
 * any nulls (which are faithfully transcribed into the returned string, but
 * which will implicitly truncate the string if it's passed to functions that
 * expect null-terminated strings).
 *
 * Avoid using this function if possible, because we'll remove it once we can
 * devise a better API for the task.
 */
extern JS_PUBLIC_API(JS::UniqueChars)
JS_EncodeStringToUTF8(JSContext* cx, JS::Handle<JSString*> str);

/**
 * DEPRECATED
 *
 * Same behavior as JS_EncodeStringToLatin1(), but encode into an ASCII string.
 *
 * This function asserts in debug mode that the input string contains only
 * ASCII characters.
 *
 * The returned string is also subject to misinterpretation if |str| contains
 * any nulls (which are faithfully transcribed into the returned string, but
 * which will implicitly truncate the string if it's passed to functions that
 * expect null-terminated strings).
 *
 * Avoid using this function if possible, because we'll remove it once we can
 * devise a better API for the task.
 */
extern JS_PUBLIC_API(JS::UniqueChars)
JS_EncodeStringToASCII(JSContext* cx, JSString* str);

#endif /* js_CharacterEncoding_h */

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 --
2		* vim: set ts=8 sts=4 et sw=4 tw=99:
3		* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#ifndef js_CharacterEncoding_h
8		#define js_CharacterEncoding_h
9
10		#include "mozilla/Range.h"
11
12		#include "js/TypeDecls.h"
13		#include "js/Utility.h"
14
15		class JSFlatString;
16
17		namespace JS {
18
19		/*
20		* By default, all C/C++ 1-byte-per-character strings passed into the JSAPI
21		* are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each
22		* byte is treated as a 2-byte character, and there is no way to pass in a
23		* string containing characters beyond U+00FF.
24		*/
25		class Latin1Chars : public mozilla::Range<Latin1Char>
26		{
27		typedef mozilla::Range<Latin1Char> Base;
28
29		public:
30		using CharT = Latin1Char;
31
32	0	Latin1Chars() : Base() {}
33	0	Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
34		Latin1Chars(const Latin1Char* aBytes, size_t aLength)
35		: Base(const_cast<Latin1Char*>(aBytes), aLength)
36	0	{}
37		Latin1Chars(const char* aBytes, size_t aLength)
38		: Base(reinterpret_cast<Latin1Char>(const_cast<char>(aBytes)), aLength)
39	0	{}
40		};
41
42		/*
43		* A Latin1Chars, but with \0 termination for C compatibility.
44		*/
45		class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
46		{
47		typedef mozilla::RangedPtr<Latin1Char> Base;
48
49		public:
50		using CharT = Latin1Char;
51
52	0	Latin1CharsZ() : Base(nullptr, 0) {}
53
54		Latin1CharsZ(char* aBytes, size_t aLength)
55		: Base(reinterpret_cast<Latin1Char*>(aBytes), aLength)
56	0	{
57	0	MOZ_ASSERT(aBytes[aLength] == '\0');
58	0	}
59
60		Latin1CharsZ(Latin1Char* aBytes, size_t aLength)
61		: Base(aBytes, aLength)
62	0	{
63	0	MOZ_ASSERT(aBytes[aLength] == '\0');
64	0	}
65
66		using Base::operator=;
67
68	0	char* c_str() { return reinterpret_cast<char*>(get()); }
69		};
70
71		class UTF8Chars : public mozilla::Range<unsigned char>
72		{
73		typedef mozilla::Range<unsigned char> Base;
74
75		public:
76		using CharT = unsigned char;
77
78	0	UTF8Chars() : Base() {}
79		UTF8Chars(char* aBytes, size_t aLength)
80		: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
81	0	{}
82		UTF8Chars(const char* aBytes, size_t aLength)
83		: Base(reinterpret_cast<unsigned char>(const_cast<char>(aBytes)), aLength)
84	3	{}
85		};
86
87		/*
88		* SpiderMonkey also deals directly with UTF-8 encoded text in some places.
89		*/
90		class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
91		{
92		typedef mozilla::RangedPtr<unsigned char> Base;
93
94		public:
95		using CharT = unsigned char;
96
97	0	UTF8CharsZ() : Base(nullptr, 0) {}
98
99		UTF8CharsZ(char* aBytes, size_t aLength)
100		: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
101	14	{
102	14	MOZ_ASSERT(aBytes[aLength] == '\0');
103	14	}
104
105		UTF8CharsZ(unsigned char* aBytes, size_t aLength)
106		: Base(aBytes, aLength)
107	0	{
108	0	MOZ_ASSERT(aBytes[aLength] == '\0');
109	0	}
110
111		using Base::operator=;
112
113	14	char* c_str() { return reinterpret_cast<char*>(get()); }
114		};
115
116		/*
117		* A wrapper for a "const char*" that is encoded using UTF-8.
118		* This class does not manage ownership of the data; that is left
119		* to others. This differs from UTF8CharsZ in that the chars are
120		* const and it disallows assignment.
121		*/
122		class JS_PUBLIC_API(ConstUTF8CharsZ)
123		{
124		const char* data_;
125
126		public:
127		using CharT = unsigned char;
128
129		ConstUTF8CharsZ() : data_(nullptr)
130	0	{}
131
132		ConstUTF8CharsZ(const char* aBytes, size_t aLength)
133		: data_(aBytes)
134	0	{
135	0	MOZ_ASSERT(aBytes[aLength] == '\0');
136		#ifdef DEBUG
137		validate(aLength);
138		#endif
139		}
140
141	0	const void* get() const { return data_; }
142
143	0	const char* c_str() const { return data_; }
144
145	0	explicit operator bool() const { return data_ != nullptr; }
146
147		private:
148		#ifdef DEBUG
149		void validate(size_t aLength);
150		#endif
151		};
152
153		/*
154		* SpiderMonkey uses a 2-byte character representation: it is a
155		* 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,
156		* but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a
157		* sufficiently dedicated JavaScript program to be fully unicode-aware by
158		* manually interpreting UTF-16 extension characters embedded in the JS
159		* string.
160		*/
161		class TwoByteChars : public mozilla::Range<char16_t>
162		{
163		typedef mozilla::Range<char16_t> Base;
164
165		public:
166		using CharT = char16_t;
167
168	0	TwoByteChars() : Base() {}
169	0	TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
170	0	TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
171		};
172
173		/*
174		* A TwoByteChars, but \0 terminated for compatibility with JSFlatString.
175		*/
176		class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
177		{
178		typedef mozilla::RangedPtr<char16_t> Base;
179
180		public:
181		using CharT = char16_t;
182
183	0	TwoByteCharsZ() : Base(nullptr, 0) {}
184
185		TwoByteCharsZ(char16_t* chars, size_t length)
186		: Base(chars, length)
187	3	{
188	3	MOZ_ASSERT(chars[length] == '\0');
189	3	}
190
191		using Base::operator=;
192		};
193
194		typedef mozilla::RangedPtr<const char16_t> ConstCharPtr;
195
196		/*
197		* Like TwoByteChars, but the chars are const.
198		*/
199		class ConstTwoByteChars : public mozilla::Range<const char16_t>
200		{
201		typedef mozilla::Range<const char16_t> Base;
202
203		public:
204		using CharT = char16_t;
205
206	0	ConstTwoByteChars() : Base() {}
207	0	ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
208		};
209
210		/*
211		* Convert a 2-byte character sequence to "ISO-Latin-1". This works by
212		* truncating each 2-byte pair in the sequence to a 1-byte pair. If the source
213		* contains any UTF-16 extension characters, then this may give invalid Latin1
214		* output. The returned string is zero terminated. The returned string or the
215		* returned string's \|start()\| must be freed with JS_free or js_free,
216		* respectively. If allocation fails, an OOM error will be set and the method
217		* will return a nullptr chars (which can be tested for with the ! operator).
218		* This method cannot trigger GC.
219		*/
220		extern Latin1CharsZ
221		LossyTwoByteCharsToNewLatin1CharsZ(JSContext* cx,
222		const mozilla::Range<const char16_t> tbchars);
223
224		inline Latin1CharsZ
225		LossyTwoByteCharsToNewLatin1CharsZ(JSContext* cx, const char16_t* begin, size_t length)
226	0	{
227	0	const mozilla::Range<const char16_t> tbchars(begin, length);
228	0	return JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars);
229	0	}
230
231		template <typename CharT>
232		extern UTF8CharsZ
233		CharsToNewUTF8CharsZ(JSContext* maybeCx, const mozilla::Range<CharT> chars);
234
235		JS_PUBLIC_API(uint32_t)
236		Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length);
237
238		/*
239		* Inflate bytes in UTF-8 encoding to char16_t.
240		* - On error, returns an empty TwoByteCharsZ.
241		* - On success, returns a malloc'd TwoByteCharsZ, and updates \|outlen\| to hold
242		* its length; the length value excludes the trailing null.
243		*/
244		extern JS_PUBLIC_API(TwoByteCharsZ)
245		UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
246
247		/*
248		* Like UTF8CharsToNewTwoByteCharsZ, but for ConstUTF8CharsZ.
249		*/
250		extern JS_PUBLIC_API(TwoByteCharsZ)
251		UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);
252
253		/*
254		* The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
255		* will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
256		* input.
257		*/
258		extern JS_PUBLIC_API(TwoByteCharsZ)
259		LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
260
261		extern JS_PUBLIC_API(TwoByteCharsZ)
262		LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);
263
264		/*
265		* Returns the length of the char buffer required to encode \|s\| as UTF8.
266		* Does not include the null-terminator.
267		*/
268		JS_PUBLIC_API(size_t)
269		GetDeflatedUTF8StringLength(JSFlatString* s);
270
271		/*
272		* Encode \|src\| as UTF8. The caller must either ensure \|dst\| has enough space
273		* to encode the entire string or pass the length of the buffer as \|dstlenp\|,
274		* in which case the function will encode characters from the string until
275		* the buffer is exhausted. Does not write the null terminator.
276		*
277		* If \|dstlenp\| is provided, it will be updated to hold the number of bytes
278		* written to the buffer. If \|numcharsp\| is provided, it will be updated to hold
279		* the number of Unicode characters written to the buffer (which can be less
280		* than the length of the string, if the buffer is exhausted before the string
281		* is fully encoded).
282		*/
283		JS_PUBLIC_API(void)
284		DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst,
285		size_t* dstlenp = nullptr, size_t* numcharsp = nullptr);
286
287		/*
288		* The smallest character encoding capable of fully representing a particular
289		* string.
290		*/
291		enum class SmallestEncoding {
292		ASCII,
293		Latin1,
294		UTF16
295		};
296
297		/*
298		* Returns the smallest encoding possible for the given string: if all
299		* codepoints are <128 then ASCII, otherwise if all codepoints are <256
300		* Latin-1, else UTF16.
301		*/
302		JS_PUBLIC_API(SmallestEncoding)
303		FindSmallestEncoding(UTF8Chars utf8);
304
305		/*
306		* Return a null-terminated Latin-1 string copied from the input string,
307		* storing its length (excluding null terminator) in \|*outlen\|. Fail and
308		* report an error if the string contains non-Latin-1 codepoints. Returns
309		* Latin1CharsZ() on failure.
310		*/
311		extern JS_PUBLIC_API(Latin1CharsZ)
312		UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
313
314		/*
315		* Return a null-terminated Latin-1 string copied from the input string,
316		* storing its length (excluding null terminator) in \|*outlen\|. Non-Latin-1
317		* codepoints are replaced by '?'. Returns Latin1CharsZ() on failure.
318		*/
319		extern JS_PUBLIC_API(Latin1CharsZ)
320		LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
321
322		/*
323		* Returns true if all characters in the given null-terminated string are
324		* ASCII, i.e. < 0x80, false otherwise.
325		*/
326		extern JS_PUBLIC_API(bool)
327		StringIsASCII(const char* s);
328
329		} // namespace JS
330
331	0	inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }
332	0	inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); }
333
334		/**
335		* DEPRECATED
336		*
337		* Allocate memory sufficient to contain the characters of \|str\| truncated to
338		* Latin-1 and a trailing null terminator, fill the memory with the characters
339		* interpreted in that manner plus the null terminator, and return a pointer to
340		* the memory.
341		*
342		* This function loses information when it copies the characters of \|str\| if
343		* \|str\| contains code units greater than 0xFF. Additionally, users that
344		* depend on null-termination will misinterpret the copied characters if \|str\|
345		* contains any nulls. Avoid using this function if possible, because it will
346		* eventually be removed.
347		*/
348		extern JS_PUBLIC_API(JS::UniqueChars)
349		JS_EncodeStringToLatin1(JSContext* cx, JSString* str);
350
351		/**
352		* DEPRECATED
353		*
354		* Same behavior as JS_EncodeStringToLatin1(), but encode into a UTF-8 string.
355		*
356		* This function loses information when it copies the characters of \|str\| if
357		* \|str\| contains invalid UTF-16: U+FFFD REPLACEMENT CHARACTER will be copied
358		* instead.
359		*
360		* The returned string is also subject to misinterpretation if \|str\| contains
361		* any nulls (which are faithfully transcribed into the returned string, but
362		* which will implicitly truncate the string if it's passed to functions that
363		* expect null-terminated strings).
364		*
365		* Avoid using this function if possible, because we'll remove it once we can
366		* devise a better API for the task.
367		*/
368		extern JS_PUBLIC_API(JS::UniqueChars)
369		JS_EncodeStringToUTF8(JSContext* cx, JS::Handle<JSString*> str);
370
371		/**
372		* DEPRECATED
373		*
374		* Same behavior as JS_EncodeStringToLatin1(), but encode into an ASCII string.
375		*
376		* This function asserts in debug mode that the input string contains only
377		* ASCII characters.
378		*
379		* The returned string is also subject to misinterpretation if \|str\| contains
380		* any nulls (which are faithfully transcribed into the returned string, but
381		* which will implicitly truncate the string if it's passed to functions that
382		* expect null-terminated strings).
383		*
384		* Avoid using this function if possible, because we'll remove it once we can
385		* devise a better API for the task.
386		*/
387		extern JS_PUBLIC_API(JS::UniqueChars)
388		JS_EncodeStringToASCII(JSContext* cx, JSString* str);
389
390		#endif /* js_CharacterEncoding_h */