Coverage Report

Created: 2025-09-05 10:05

/src/node/src/node_i18n.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
#include "node_external_reference.h"
45
46
#if defined(NODE_HAVE_I18N_SUPPORT)
47
48
#include "base_object-inl.h"
49
#include "node.h"
50
#include "node_buffer.h"
51
#include "node_errors.h"
52
#include "node_internals.h"
53
#include "string_bytes.h"
54
#include "util-inl.h"
55
#include "v8.h"
56
57
#include <unicode/putil.h>
58
#include <unicode/timezone.h>
59
#include <unicode/uchar.h>
60
#include <unicode/uclean.h>
61
#include <unicode/ucnv.h>
62
#include <unicode/udata.h>
63
#include <unicode/uidna.h>
64
#include <unicode/ulocdata.h>
65
#include <unicode/urename.h>
66
#include <unicode/ustring.h>
67
#include <unicode/utf16.h>
68
#include <unicode/utf8.h>
69
#include <unicode/utypes.h>
70
#include <unicode/uvernum.h>
71
#include <unicode/uversion.h>
72
73
#ifdef NODE_HAVE_SMALL_ICU
74
/* if this is defined, we have a 'secondary' entry point.
75
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
76
#define SMALL_ICUDATA_ENTRY_POINT \
77
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
78
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
79
#ifndef U_LIB_SUFFIX_C_NAME
80
#define SMALL_DEF(major, suff) icusmdt##major##_dat
81
#else
82
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
83
#endif
84
85
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
86
#endif
87
88
namespace node {
89
90
using v8::Context;
91
using v8::FunctionCallbackInfo;
92
using v8::FunctionTemplate;
93
using v8::Int32;
94
using v8::Isolate;
95
using v8::Local;
96
using v8::MaybeLocal;
97
using v8::NewStringType;
98
using v8::Object;
99
using v8::ObjectTemplate;
100
using v8::String;
101
using v8::Value;
102
103
namespace i18n {
104
namespace {
105
106
template <typename T>
107
4
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
108
4
  MaybeLocal<Object> ret = Buffer::New(env, buf);
109
4
  if (ret.IsEmpty())
110
0
    return ret;
111
112
4
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
113
4
                "Currently only one- or two-byte buffers are supported");
114
4
  if (sizeof(T) > 1 && IsBigEndian()) {
115
0
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
116
0
    SwapBytes16(retbuf_data, retbuf_length);
117
0
  }
118
119
4
  return ret;
120
4
}
node_i18n.cc:v8::MaybeLocal<v8::Object> node::i18n::(anonymous namespace)::ToBufferEndian<char>(node::Environment*, node::MaybeStackBuffer<char, 1024ul>*)
Line
Count
Source
107
4
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
108
4
  MaybeLocal<Object> ret = Buffer::New(env, buf);
109
4
  if (ret.IsEmpty())
110
0
    return ret;
111
112
4
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
113
4
                "Currently only one- or two-byte buffers are supported");
114
4
  if (sizeof(T) > 1 && IsBigEndian()) {
115
0
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
116
0
    SwapBytes16(retbuf_data, retbuf_length);
117
0
  }
118
119
4
  return ret;
120
4
}
Unexecuted instantiation: node_i18n.cc:v8::MaybeLocal<v8::Object> node::i18n::(anonymous namespace)::ToBufferEndian<char16_t>(node::Environment*, node::MaybeStackBuffer<char16_t, 1024ul>*)
121
122
// One-Shot Converters
123
124
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
125
                      const char* data,
126
                      const size_t length,
127
0
                      const size_t length_in_chars) {
128
0
  dest->AllocateSufficientStorage(length_in_chars);
129
0
  char* dst = reinterpret_cast<char*>(**dest);
130
0
  memcpy(dst, data, length);
131
0
  if (IsBigEndian()) {
132
0
    SwapBytes16(dst, length);
133
0
  }
134
0
}
135
136
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
137
                                            const char* fromEncoding,
138
                                            const char* toEncoding,
139
                                            const char* source,
140
                                            const size_t source_length,
141
                                            UErrorCode* status);
142
143
MaybeLocal<Object> Transcode(Environment* env,
144
                             const char* fromEncoding,
145
                             const char* toEncoding,
146
                             const char* source,
147
                             const size_t source_length,
148
4
                             UErrorCode* status) {
149
4
  *status = U_ZERO_ERROR;
150
4
  MaybeLocal<Object> ret;
151
4
  MaybeStackBuffer<char> result;
152
4
  Converter to(toEncoding);
153
4
  Converter from(fromEncoding);
154
155
4
  size_t sublen = ucnv_getMinCharSize(to.conv());
156
4
  std::string sub(sublen, '?');
157
4
  to.set_subst_chars(sub.c_str());
158
159
4
  const uint32_t limit = source_length * to.max_char_size();
160
4
  result.AllocateSufficientStorage(limit);
161
4
  char* target = *result;
162
4
  ucnv_convertEx(to.conv(), from.conv(), &target, target + limit,
163
4
                 &source, source + source_length, nullptr, nullptr,
164
4
                 nullptr, nullptr, true, true, status);
165
4
  if (U_SUCCESS(*status)) {
166
4
    result.SetLength(target - &result[0]);
167
4
    ret = ToBufferEndian(env, &result);
168
4
  }
169
4
  return ret;
170
4
}
171
172
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
173
                                   const char* fromEncoding,
174
                                   const char* toEncoding,
175
                                   const char* source,
176
                                   const size_t source_length,
177
0
                                   UErrorCode* status) {
178
0
  *status = U_ZERO_ERROR;
179
0
  MaybeLocal<Object> ret;
180
0
  MaybeStackBuffer<UChar> destbuf(source_length);
181
0
  Converter from(fromEncoding);
182
0
  const size_t length_in_chars = source_length * sizeof(UChar);
183
0
  ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
184
0
                source, source_length, status);
185
0
  if (U_SUCCESS(*status))
186
0
    ret = ToBufferEndian(env, &destbuf);
187
0
  return ret;
188
0
}
189
190
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
191
                                     const char* fromEncoding,
192
                                     const char* toEncoding,
193
                                     const char* source,
194
                                     const size_t source_length,
195
0
                                     UErrorCode* status) {
196
0
  *status = U_ZERO_ERROR;
197
0
  MaybeStackBuffer<UChar> sourcebuf;
198
0
  MaybeLocal<Object> ret;
199
0
  Converter to(toEncoding);
200
201
0
  size_t sublen = ucnv_getMinCharSize(to.conv());
202
0
  std::string sub(sublen, '?');
203
0
  to.set_subst_chars(sub.c_str());
204
205
0
  const size_t length_in_chars = source_length / sizeof(UChar);
206
0
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
207
0
  MaybeStackBuffer<char> destbuf(length_in_chars);
208
0
  const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars,
209
0
                                       *sourcebuf, length_in_chars, status);
210
0
  if (U_SUCCESS(*status)) {
211
0
    destbuf.SetLength(len);
212
0
    ret = ToBufferEndian(env, &destbuf);
213
0
  }
214
0
  return ret;
215
0
}
216
217
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
218
                                         const char* fromEncoding,
219
                                         const char* toEncoding,
220
                                         const char* source,
221
                                         const size_t source_length,
222
0
                                         UErrorCode* status) {
223
0
  *status = U_ZERO_ERROR;
224
0
  MaybeStackBuffer<UChar> destbuf;
225
0
  int32_t result_length;
226
0
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
227
0
                source, source_length, status);
228
0
  MaybeLocal<Object> ret;
229
0
  if (U_SUCCESS(*status)) {
230
0
    destbuf.SetLength(result_length);
231
0
    ret = ToBufferEndian(env, &destbuf);
232
0
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
233
0
    *status = U_ZERO_ERROR;
234
0
    destbuf.AllocateSufficientStorage(result_length);
235
0
    u_strFromUTF8(*destbuf, result_length, &result_length,
236
0
                  source, source_length, status);
237
0
    if (U_SUCCESS(*status)) {
238
0
      destbuf.SetLength(result_length);
239
0
      ret = ToBufferEndian(env, &destbuf);
240
0
    }
241
0
  }
242
0
  return ret;
243
0
}
244
245
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
246
                                         const char* fromEncoding,
247
                                         const char* toEncoding,
248
                                         const char* source,
249
                                         const size_t source_length,
250
0
                                         UErrorCode* status) {
251
0
  *status = U_ZERO_ERROR;
252
0
  MaybeLocal<Object> ret;
253
0
  const size_t length_in_chars = source_length / sizeof(UChar);
254
0
  int32_t result_length;
255
0
  MaybeStackBuffer<UChar> sourcebuf;
256
0
  MaybeStackBuffer<char> destbuf;
257
0
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
258
0
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
259
0
              *sourcebuf, length_in_chars, status);
260
0
  if (U_SUCCESS(*status)) {
261
0
    destbuf.SetLength(result_length);
262
0
    ret = ToBufferEndian(env, &destbuf);
263
0
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
264
0
    *status = U_ZERO_ERROR;
265
0
    destbuf.AllocateSufficientStorage(result_length);
266
0
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
267
0
                length_in_chars, status);
268
0
    if (U_SUCCESS(*status)) {
269
0
      destbuf.SetLength(result_length);
270
0
      ret = ToBufferEndian(env, &destbuf);
271
0
    }
272
0
  }
273
0
  return ret;
274
0
}
275
276
8
const char* EncodingName(const enum encoding encoding) {
277
8
  switch (encoding) {
278
4
    case ASCII: return "us-ascii";
279
0
    case LATIN1: return "iso8859-1";
280
0
    case UCS2: return "utf16le";
281
4
    case UTF8: return "utf-8";
282
0
    default: return nullptr;
283
8
  }
284
8
}
285
286
8
bool SupportedEncoding(const enum encoding encoding) {
287
8
  switch (encoding) {
288
4
    case ASCII:
289
4
    case LATIN1:
290
4
    case UCS2:
291
8
    case UTF8: return true;
292
0
    default: return false;
293
8
  }
294
8
}
295
296
4
void Transcode(const FunctionCallbackInfo<Value>&args) {
297
4
  Environment* env = Environment::GetCurrent(args);
298
4
  Isolate* isolate = env->isolate();
299
4
  UErrorCode status = U_ZERO_ERROR;
300
4
  MaybeLocal<Object> result;
301
302
4
  ArrayBufferViewContents<char> input(args[0]);
303
4
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
304
4
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
305
306
4
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
307
4
    TranscodeFunc tfn = &Transcode;
308
4
    switch (fromEncoding) {
309
0
      case ASCII:
310
0
      case LATIN1:
311
0
        if (toEncoding == UCS2)
312
0
          tfn = &TranscodeToUcs2;
313
0
        break;
314
4
      case UTF8:
315
4
        if (toEncoding == UCS2)
316
0
          tfn = &TranscodeUcs2FromUtf8;
317
4
        break;
318
0
      case UCS2:
319
0
        switch (toEncoding) {
320
0
          case UCS2:
321
0
            tfn = &Transcode;
322
0
            break;
323
0
          case UTF8:
324
0
            tfn = &TranscodeUtf8FromUcs2;
325
0
            break;
326
0
          default:
327
0
            tfn = &TranscodeFromUcs2;
328
0
        }
329
0
        break;
330
0
      default:
331
        // This should not happen because of the SupportedEncoding checks
332
0
        ABORT();
333
4
    }
334
335
4
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
336
4
                 input.data(), input.length(), &status);
337
4
  } else {
338
0
    status = U_ILLEGAL_ARGUMENT_ERROR;
339
0
  }
340
341
4
  if (result.IsEmpty())
342
0
    return args.GetReturnValue().Set(status);
343
344
4
  return args.GetReturnValue().Set(result.ToLocalChecked());
345
4
}
346
347
0
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
348
0
  Environment* env = Environment::GetCurrent(args);
349
0
  CHECK(args[0]->IsInt32());
350
0
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
351
0
  args.GetReturnValue().Set(
352
0
      String::NewFromUtf8(env->isolate(),
353
0
                          u_errorName(status)).ToLocalChecked());
354
0
}
355
356
}  // anonymous namespace
357
358
8
Converter::Converter(const char* name, const char* sub) {
359
8
  UErrorCode status = U_ZERO_ERROR;
360
8
  UConverter* conv = ucnv_open(name, &status);
361
8
  CHECK(U_SUCCESS(status));
362
8
  conv_.reset(conv);
363
8
  set_subst_chars(sub);
364
8
}
365
366
Converter::Converter(UConverter* converter, const char* sub)
367
0
    : conv_(converter) {
368
0
  set_subst_chars(sub);
369
0
}
370
371
12
void Converter::set_subst_chars(const char* sub) {
372
12
  CHECK(conv_);
373
12
  UErrorCode status = U_ZERO_ERROR;
374
12
  if (sub != nullptr) {
375
4
    ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status);
376
4
    CHECK(U_SUCCESS(status));
377
4
  }
378
12
}
379
380
0
void Converter::reset() {
381
0
  ucnv_reset(conv_.get());
382
0
}
383
384
0
size_t Converter::min_char_size() const {
385
0
  CHECK(conv_);
386
0
  return ucnv_getMinCharSize(conv_.get());
387
0
}
388
389
4
size_t Converter::max_char_size() const {
390
4
  CHECK(conv_);
391
4
  return ucnv_getMaxCharSize(conv_.get());
392
4
}
393
394
0
void ConverterObject::Has(const FunctionCallbackInfo<Value>& args) {
395
0
  Environment* env = Environment::GetCurrent(args);
396
397
0
  CHECK_GE(args.Length(), 1);
398
0
  Utf8Value label(env->isolate(), args[0]);
399
400
0
  UErrorCode status = U_ZERO_ERROR;
401
0
  ConverterPointer conv(ucnv_open(*label, &status));
402
0
  args.GetReturnValue().Set(!!U_SUCCESS(status));
403
0
}
404
405
0
void ConverterObject::Create(const FunctionCallbackInfo<Value>& args) {
406
0
  Environment* env = Environment::GetCurrent(args);
407
408
0
  Local<ObjectTemplate> t = env->i18n_converter_template();
409
0
  Local<Object> obj;
410
0
  if (!t->NewInstance(env->context()).ToLocal(&obj)) return;
411
412
0
  CHECK_GE(args.Length(), 2);
413
0
  Utf8Value label(env->isolate(), args[0]);
414
0
  int flags = args[1]->Uint32Value(env->context()).ToChecked();
415
0
  bool fatal =
416
0
      (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
417
418
0
  UErrorCode status = U_ZERO_ERROR;
419
0
  UConverter* conv = ucnv_open(*label, &status);
420
0
  if (U_FAILURE(status))
421
0
    return;
422
423
0
  if (fatal) {
424
0
    status = U_ZERO_ERROR;
425
0
    ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
426
0
                        nullptr, nullptr, nullptr, &status);
427
0
  }
428
429
0
  auto converter = new ConverterObject(env, obj, conv, flags);
430
0
  size_t sublen = ucnv_getMinCharSize(conv);
431
0
  std::string sub(sublen, '?');
432
0
  converter->set_subst_chars(sub.c_str());
433
434
0
  args.GetReturnValue().Set(obj);
435
0
}
436
437
0
void ConverterObject::Decode(const FunctionCallbackInfo<Value>& args) {
438
0
  Environment* env = Environment::GetCurrent(args);
439
440
0
  CHECK_GE(args.Length(), 4);  // Converter, Buffer, Flags, Encoding
441
442
0
  ConverterObject* converter;
443
0
  ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
444
445
0
  if (!(args[1]->IsArrayBuffer() || args[1]->IsSharedArrayBuffer() ||
446
0
        args[1]->IsArrayBufferView())) {
447
0
    return node::THROW_ERR_INVALID_ARG_TYPE(
448
0
        env->isolate(),
449
0
        "The \"input\" argument must be an instance of SharedArrayBuffer, "
450
0
        "ArrayBuffer or ArrayBufferView.");
451
0
  }
452
453
0
  ArrayBufferViewContents<char> input(args[1]);
454
0
  int flags = args[2]->Uint32Value(env->context()).ToChecked();
455
456
0
  CHECK(args[3]->IsString());
457
0
  Local<String> from_encoding = args[3].As<String>();
458
459
0
  UErrorCode status = U_ZERO_ERROR;
460
0
  MaybeStackBuffer<UChar> result;
461
462
0
  UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
463
464
  // When flushing the final chunk, the limit is the maximum
465
  // of either the input buffer length or the number of pending
466
  // characters times the min char size, multiplied by 2 as unicode may
467
  // take up to 2 UChars to encode a character
468
0
  size_t limit = 2 * converter->min_char_size() *
469
0
      (!flush ?
470
0
          input.length() :
471
0
          std::max(
472
0
              input.length(),
473
0
              static_cast<size_t>(
474
0
                  ucnv_toUCountPending(converter->conv(), &status))));
475
0
  status = U_ZERO_ERROR;
476
477
0
  if (limit > 0)
478
0
    result.AllocateSufficientStorage(limit);
479
480
0
  auto cleanup = OnScopeLeave([&]() {
481
0
    if (flush) {
482
      // Reset the converter state.
483
0
      converter->set_bom_seen(false);
484
0
      converter->reset();
485
0
    }
486
0
  });
487
488
0
  const char* source = input.data();
489
0
  size_t source_length = input.length();
490
491
0
  UChar* target = *result;
492
0
  ucnv_toUnicode(converter->conv(),
493
0
                 &target,
494
0
                 target + limit,
495
0
                 &source,
496
0
                 source + source_length,
497
0
                 nullptr,
498
0
                 flush,
499
0
                 &status);
500
501
0
  if (U_SUCCESS(status)) {
502
0
    bool omit_initial_bom = false;
503
0
    if (limit > 0) {
504
0
      result.SetLength(target - &result[0]);
505
0
      if (result.length() > 0 &&
506
0
          converter->unicode() &&
507
0
          !converter->ignore_bom() &&
508
0
          !converter->bom_seen()) {
509
        // If the very first result in the stream is a BOM, and we are not
510
        // explicitly told to ignore it, then we mark it for discarding.
511
0
        if (result[0] == 0xFEFF)
512
0
          omit_initial_bom = true;
513
0
        converter->set_bom_seen(true);
514
0
      }
515
0
    }
516
517
0
    Local<Value> error;
518
0
    UChar* output = result.out();
519
0
    size_t beginning = 0;
520
0
    size_t length = result.length() * sizeof(UChar);
521
522
0
    if (omit_initial_bom) {
523
      // Perform `ret = ret.slice(2)`.
524
0
      beginning += 2;
525
0
      length -= 2;
526
0
    }
527
528
0
    char* value = reinterpret_cast<char*>(output) + beginning;
529
530
0
    if (IsBigEndian()) {
531
0
      SwapBytes16(value, length);
532
0
    }
533
534
0
    MaybeLocal<Value> encoded =
535
0
        StringBytes::Encode(env->isolate(), value, length, UCS2, &error);
536
537
0
    Local<Value> ret;
538
0
    if (encoded.ToLocal(&ret)) {
539
0
      args.GetReturnValue().Set(ret);
540
0
      return;
541
0
    }
542
0
  }
543
544
0
  node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
545
0
      env->isolate(),
546
0
      "The encoded data was not valid for encoding %s",
547
0
      *node::Utf8Value(env->isolate(), from_encoding));
548
0
}
549
550
ConverterObject::ConverterObject(
551
    Environment* env,
552
    Local<Object> wrap,
553
    UConverter* converter,
554
    int flags,
555
    const char* sub)
556
0
    : BaseObject(env, wrap),
557
0
      Converter(converter, sub),
558
0
      flags_(flags) {
559
0
  MakeWeak();
560
561
0
  switch (ucnv_getType(converter)) {
562
0
    case UCNV_UTF8:
563
0
    case UCNV_UTF16_BigEndian:
564
0
    case UCNV_UTF16_LittleEndian:
565
0
      flags_ |= CONVERTER_FLAGS_UNICODE;
566
0
      break;
567
0
    default: {
568
      // Fall through
569
0
    }
570
0
  }
571
0
}
572
573
126k
bool InitializeICUDirectory(const std::string& path, std::string* error) {
574
126k
  UErrorCode status = U_ZERO_ERROR;
575
126k
  if (path.empty()) {
576
#ifdef NODE_HAVE_SMALL_ICU
577
    // install the 'small' data.
578
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
579
#else  // !NODE_HAVE_SMALL_ICU
580
    // no small data, so nothing to do.
581
126k
#endif  // !NODE_HAVE_SMALL_ICU
582
126k
  } else {
583
0
    u_setDataDirectory(path.c_str());
584
0
    u_init(&status);
585
0
  }
586
126k
  if (status == U_ZERO_ERROR) {
587
126k
    return true;
588
126k
  }
589
590
0
  *error = u_errorName(status);
591
0
  return false;
592
126k
}
593
594
0
void SetDefaultTimeZone(const char* tzid) {
595
0
  size_t tzidlen = strlen(tzid) + 1;
596
0
  UErrorCode status = U_ZERO_ERROR;
597
0
  MaybeStackBuffer<UChar, 256> id(tzidlen);
598
0
  u_charsToUChars(tzid, id.out(), tzidlen);
599
  // This is threadsafe:
600
0
  ucal_setDefaultTimeZone(id.out(), &status);
601
0
  CHECK(U_SUCCESS(status));
602
0
}
603
604
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
605
                  const char* input,
606
0
                  size_t length) {
607
0
  UErrorCode status = U_ZERO_ERROR;
608
0
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
609
0
  UIDNA* uidna = uidna_openUTS46(options, &status);
610
0
  if (U_FAILURE(status))
611
0
    return -1;
612
0
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
613
614
0
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
615
0
                                        input, length,
616
0
                                        **buf, buf->capacity(),
617
0
                                        &info,
618
0
                                        &status);
619
620
  // Do not check info.errors like we do with ToASCII since ToUnicode always
621
  // returns a string, despite any possible errors that may have occurred.
622
623
0
  if (status == U_BUFFER_OVERFLOW_ERROR) {
624
0
    status = U_ZERO_ERROR;
625
0
    buf->AllocateSufficientStorage(len);
626
0
    len = uidna_nameToUnicodeUTF8(uidna,
627
0
                                  input, length,
628
0
                                  **buf, buf->capacity(),
629
0
                                  &info,
630
0
                                  &status);
631
0
  }
632
633
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
634
  // string, regardless of whether an error occurred.
635
636
0
  if (U_FAILURE(status)) {
637
0
    len = -1;
638
0
    buf->SetLength(0);
639
0
  } else {
640
0
    buf->SetLength(len);
641
0
  }
642
643
0
  uidna_close(uidna);
644
0
  return len;
645
0
}
646
647
int32_t ToASCII(MaybeStackBuffer<char>* buf,
648
                const char* input,
649
                size_t length,
650
0
                idna_mode mode) {
651
0
  UErrorCode status = U_ZERO_ERROR;
652
0
  uint32_t options =                  // CheckHyphens = false; handled later
653
0
    UIDNA_CHECK_BIDI |                // CheckBidi = true
654
0
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
655
0
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
656
0
  if (mode == idna_mode::kStrict) {
657
0
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
658
                                      // VerifyDnsLength = beStrict;
659
                                      //   handled later
660
0
  }
661
662
0
  UIDNA* uidna = uidna_openUTS46(options, &status);
663
0
  if (U_FAILURE(status))
664
0
    return -1;
665
0
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
666
667
0
  int32_t len = uidna_nameToASCII_UTF8(uidna,
668
0
                                       input, length,
669
0
                                       **buf, buf->capacity(),
670
0
                                       &info,
671
0
                                       &status);
672
673
0
  if (status == U_BUFFER_OVERFLOW_ERROR) {
674
0
    status = U_ZERO_ERROR;
675
0
    buf->AllocateSufficientStorage(len);
676
0
    len = uidna_nameToASCII_UTF8(uidna,
677
0
                                 input, length,
678
0
                                 **buf, buf->capacity(),
679
0
                                 &info,
680
0
                                 &status);
681
0
  }
682
683
  // In UTS #46 which specifies ToASCII, certain error conditions are
684
  // configurable through options, and the WHATWG URL Standard promptly elects
685
  // to disable some of them to accommodate for real-world use cases.
686
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
687
  // these options through `options` above, and thus continues throwing
688
  // unnecessary errors. To counter this situation, we just filter out the
689
  // errors that may have happened afterwards, before deciding whether to
690
  // return an error from this function.
691
692
  // CheckHyphens = false
693
  // (Specified in the current UTS #46 draft rev. 18.)
694
  // Refs:
695
  // - https://github.com/whatwg/url/issues/53
696
  // - https://github.com/whatwg/url/pull/309
697
  // - http://www.unicode.org/review/pri317/
698
  // - http://www.unicode.org/reports/tr46/tr46-18.html
699
  // - https://www.icann.org/news/announcement-2000-01-07-en
700
0
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
701
0
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
702
0
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
703
704
0
  if (mode != idna_mode::kStrict) {
705
    // VerifyDnsLength = beStrict
706
0
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
707
0
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
708
0
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
709
0
  }
710
711
0
  if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) {
712
0
    len = -1;
713
0
    buf->SetLength(0);
714
0
  } else {
715
0
    buf->SetLength(len);
716
0
  }
717
718
0
  uidna_close(uidna);
719
0
  return len;
720
0
}
721
722
0
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
723
0
  Environment* env = Environment::GetCurrent(args);
724
0
  CHECK_GE(args.Length(), 1);
725
0
  CHECK(args[0]->IsString());
726
0
  Utf8Value val(env->isolate(), args[0]);
727
728
0
  MaybeStackBuffer<char> buf;
729
0
  int32_t len = ToUnicode(&buf, *val, val.length());
730
731
0
  if (len < 0) {
732
0
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
733
0
  }
734
735
0
  args.GetReturnValue().Set(
736
0
      String::NewFromUtf8(env->isolate(),
737
0
                          *buf,
738
0
                          NewStringType::kNormal,
739
0
                          len).ToLocalChecked());
740
0
}
741
742
0
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
743
0
  Environment* env = Environment::GetCurrent(args);
744
0
  CHECK_GE(args.Length(), 1);
745
0
  CHECK(args[0]->IsString());
746
0
  Utf8Value val(env->isolate(), args[0]);
747
  // optional arg
748
0
  bool lenient = args[1]->BooleanValue(env->isolate());
749
0
  idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault;
750
751
0
  MaybeStackBuffer<char> buf;
752
0
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
753
754
0
  if (len < 0) {
755
0
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
756
0
  }
757
758
0
  args.GetReturnValue().Set(
759
0
      String::NewFromUtf8(env->isolate(),
760
0
                          *buf,
761
0
                          NewStringType::kNormal,
762
0
                          len).ToLocalChecked());
763
0
}
764
765
// This is similar to wcwidth except that it takes the current unicode
766
// character properties database into consideration, allowing it to
767
// correctly calculate the column widths of things like emoji's and
768
// newer wide characters. wcwidth, on the other hand, uses a fixed
769
// algorithm that does not take things like emoji into proper
770
// consideration.
771
//
772
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
773
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
774
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
775
// allow it to be input. Linux's PTY terminal prints control characters as
776
// Narrow rhombi.
777
//
778
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
779
// consonants are 0-width when combined with initial consonants; otherwise they
780
// are technically Wide. But many terminals (including Konsole and
781
// VTE/GLib-based) implement all medials and finals as 0-width.
782
//
783
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
784
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
785
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
786
static int GetColumnWidth(UChar32 codepoint,
787
0
                          bool ambiguous_as_full_width = false) {
788
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
789
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
790
  // or halfwidth.
791
0
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
792
0
  switch (eaw) {
793
0
    case U_EA_FULLWIDTH:
794
0
    case U_EA_WIDE:
795
0
      return 2;
796
0
    case U_EA_AMBIGUOUS:
797
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
798
0
      if (ambiguous_as_full_width) {
799
0
        return 2;
800
0
      }
801
      // If ambiguous_as_full_width is false:
802
0
      [[fallthrough]];
803
0
    case U_EA_NEUTRAL:
804
0
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
805
0
        return 2;
806
0
      }
807
0
      [[fallthrough]];
808
0
    case U_EA_HALFWIDTH:
809
0
    case U_EA_NARROW:
810
0
    default:
811
0
      const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
812
0
                                  U_GC_CF_MASK |  // Format control character
813
0
                                  U_GC_ME_MASK |  // Enclosing mark
814
0
                                  U_GC_MN_MASK;   // Nonspacing mark
815
0
      if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
816
0
          ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
817
0
          u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
818
0
        return 0;
819
0
      }
820
0
      return 1;
821
0
  }
822
0
}
823
824
// Returns the column width for the given String.
825
0
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
826
0
  Environment* env = Environment::GetCurrent(args);
827
0
  CHECK(args[0]->IsString());
828
829
0
  bool ambiguous_as_full_width = args[1]->IsTrue();
830
0
  bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue();
831
832
0
  TwoByteValue value(env->isolate(), args[0]);
833
  // reinterpret_cast is required by windows to compile
834
0
  UChar* str = reinterpret_cast<UChar*>(*value);
835
0
  static_assert(sizeof(*str) == sizeof(**value),
836
0
                "sizeof(*str) == sizeof(**value)");
837
0
  UChar32 c = 0;
838
0
  UChar32 p;
839
0
  size_t n = 0;
840
0
  uint32_t width = 0;
841
842
0
  while (n < value.length()) {
843
0
    p = c;
844
0
    U16_NEXT(str, n, value.length(), c);
845
    // Don't count individual emoji codepoints that occur within an
846
    // emoji sequence. This is not necessarily foolproof. Some
847
    // environments display emoji sequences in the appropriate
848
    // condensed form (as a single emoji glyph), other environments
849
    // may not understand an emoji sequence and will display each
850
    // individual emoji separately. When this happens, the width
851
    // calculated will be off, and there's no reliable way of knowing
852
    // in advance if a particular sequence is going to be supported.
853
    // The expand_emoji_sequence option allows the caller to skip this
854
    // check and count each code within an emoji sequence separately.
855
    // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
856
0
    if (!expand_emoji_sequence &&
857
0
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
858
0
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
859
0
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
860
0
      continue;
861
0
    }
862
0
    width += GetColumnWidth(c, ambiguous_as_full_width);
863
0
  }
864
0
  args.GetReturnValue().Set(width);
865
0
}
866
867
static void CreatePerIsolateProperties(IsolateData* isolate_data,
868
127k
                                       Local<ObjectTemplate> target) {
869
127k
  Isolate* isolate = isolate_data->isolate();
870
871
127k
  SetMethod(isolate, target, "toUnicode", ToUnicode);
872
127k
  SetMethod(isolate, target, "toASCII", ToASCII);
873
127k
  SetMethod(isolate, target, "getStringWidth", GetStringWidth);
874
875
  // One-shot converters
876
127k
  SetMethod(isolate, target, "icuErrName", ICUErrorName);
877
127k
  SetMethod(isolate, target, "transcode", Transcode);
878
879
  // ConverterObject
880
127k
  {
881
127k
    Local<FunctionTemplate> t = NewFunctionTemplate(isolate, nullptr);
882
127k
    t->InstanceTemplate()->SetInternalFieldCount(
883
127k
        ConverterObject::kInternalFieldCount);
884
127k
    Local<String> converter_string =
885
127k
        FIXED_ONE_BYTE_STRING(isolate, "Converter");
886
127k
    t->SetClassName(converter_string);
887
127k
    isolate_data->set_i18n_converter_template(t->InstanceTemplate());
888
127k
  }
889
890
127k
  SetMethod(isolate, target, "getConverter", ConverterObject::Create);
891
127k
  SetMethod(isolate, target, "decode", ConverterObject::Decode);
892
127k
  SetMethod(isolate, target, "hasConverter", ConverterObject::Has);
893
127k
}
894
895
void CreatePerContextProperties(Local<Object> target,
896
                                Local<Value> unused,
897
                                Local<Context> context,
898
127k
                                void* priv) {}
899
900
0
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
901
0
  registry->Register(ToUnicode);
902
0
  registry->Register(ToASCII);
903
0
  registry->Register(GetStringWidth);
904
0
  registry->Register(ICUErrorName);
905
0
  registry->Register(Transcode);
906
0
  registry->Register(ConverterObject::Create);
907
0
  registry->Register(ConverterObject::Decode);
908
0
  registry->Register(ConverterObject::Has);
909
0
}
910
911
}  // namespace i18n
912
}  // namespace node
913
914
NODE_BINDING_CONTEXT_AWARE_INTERNAL(icu, node::i18n::CreatePerContextProperties)
915
NODE_BINDING_PER_ISOLATE_INIT(icu, node::i18n::CreatePerIsolateProperties)
916
NODE_BINDING_EXTERNAL_REFERENCE(icu, node::i18n::RegisterExternalReferences)
917
918
#endif  // NODE_HAVE_I18N_SUPPORT