Coverage Report

Created: 2026-04-27 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/dovecot/src/lib/unichar.c
Line
Count
Source
1
/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "array.h"
5
#include "str.h"
6
#include "bsearch-insert-pos.h"
7
#include "unicode-data.h"
8
#include "unicode-transform.h"
9
#include "unichar.h"
10
11
const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN] =
12
  { 0xef, 0xbf, 0xbd }; /* 0xfffd */
13
14
static const uint8_t utf8_non1_bytes[256 - 192 - 2] = {
15
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
16
  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
17
};
18
19
const uint8_t *const uni_utf8_non1_bytes = utf8_non1_bytes;
20
21
unsigned int uni_strlen(const unichar_t *str)
22
0
{
23
0
  unsigned int len = 0;
24
25
0
  for (len = 0; str[len] != 0; len++) ;
26
27
0
  return len;
28
0
}
29
30
static int
31
uni_utf8_parse_char(const void *_buffer, size_t size, bool cstr,
32
        unichar_t *chr_r)
33
538M
{
34
538M
  static unichar_t lowest_valid_chr_table[] =
35
538M
    { 0, 0, 0x80, 0x800, 0x10000, 0x200000, 0x4000000 };
36
538M
  const unsigned char *input = _buffer;
37
538M
  unichar_t chr, lowest_valid_chr;
38
538M
  unsigned int i, len;
39
538M
  int ret;
40
41
538M
  i_assert(size > 0);
42
43
538M
  if (*input < 0x80) {
44
185M
    *chr_r = *input;
45
185M
    return 1;
46
185M
  }
47
48
  /* first byte has len highest bits set, followed by zero bit.
49
     the rest of the bits are used as the highest bits of the value. */
50
353M
  chr = *input;
51
353M
  len = uni_utf8_char_bytes(*input);
52
353M
  switch (len) {
53
79.9M
  case 2:
54
79.9M
    chr &= 0x1f;
55
79.9M
    break;
56
260M
  case 3:
57
260M
    chr &= 0x0f;
58
260M
    break;
59
2.99M
  case 4:
60
2.99M
    chr &= 0x07;
61
2.99M
    break;
62
178k
  case 5:
63
178k
    chr &= 0x03;
64
178k
    break;
65
94.6k
  case 6:
66
94.6k
    chr &= 0x01;
67
94.6k
    break;
68
9.88M
  default:
69
    /* only 7bit chars should have len==1 */
70
9.88M
    i_assert(len == 1);
71
9.88M
    return -1;
72
353M
  }
73
74
343M
  if (len <= size) {
75
343M
    lowest_valid_chr = lowest_valid_chr_table[len];
76
343M
    ret = len;
77
343M
  } else {
78
    /* check first if the input is invalid before returning 0 */
79
38.9k
    lowest_valid_chr = 0;
80
38.9k
    ret = 0;
81
38.9k
    len = size;
82
38.9k
  }
83
84
  /* the following bytes must all be 10xxxxxx */
85
936M
  for (i = 1; i < len; i++) {
86
602M
    if ((input[i] & 0xc0) != 0x80) {
87
9.18M
      return (cstr && size == SIZE_MAX && input[i] == '\0' ?
88
9.18M
        0 : -1);
89
9.18M
    }
90
91
592M
    chr <<= 6;
92
592M
    chr |= input[i] & 0x3f;
93
592M
  }
94
  /* these are specified as invalid encodings by standards
95
     see RFC3629 */
96
334M
  if (!uni_is_valid_ucs4(chr))
97
43.1k
    return -1;
98
334M
  if (chr < lowest_valid_chr) {
99
    /* overlong encoding */
100
2.34k
    return -1;
101
2.34k
  }
102
103
334M
  *chr_r = chr;
104
334M
  return ret;
105
334M
}
106
107
int uni_utf8_get_char(const char *input, unichar_t *chr_r)
108
34.7M
{
109
34.7M
  return uni_utf8_parse_char(input, SIZE_MAX, TRUE, chr_r);
110
34.7M
}
111
112
int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r)
113
490M
{
114
490M
  return uni_utf8_parse_char(input, max_len, TRUE, chr_r);
115
490M
}
116
117
int uni_utf8_get_char_buf(const void *buffer, size_t size, unichar_t *chr_r)
118
12.6M
{
119
12.6M
  return uni_utf8_parse_char(buffer, size, FALSE, chr_r);
120
12.6M
}
121
122
int uni_utf8_to_ucs4(const char *input, ARRAY_TYPE(unichars) *output)
123
0
{
124
0
  unichar_t chr;
125
126
0
  while (*input != '\0') {
127
0
    int len = uni_utf8_get_char(input, &chr);
128
0
    if (len <= 0) {
129
      /* invalid input */
130
0
      return -1;
131
0
    }
132
0
                input += len;
133
134
0
    array_push_back(output, &chr);
135
0
  }
136
0
  return 0;
137
0
}
138
139
int uni_utf8_to_ucs4_n(const unsigned char *input, size_t size,
140
           ARRAY_TYPE(unichars) *output)
141
0
{
142
0
  unichar_t chr;
143
144
0
  while (size > 0) {
145
0
    int len = uni_utf8_get_char_n(input, size, &chr);
146
0
    if (len <= 0)
147
0
      return -1; /* invalid input */
148
0
    input += len; size -= len;
149
150
0
    array_push_back(output, &chr);
151
0
  }
152
0
  return 0;
153
0
}
154
155
void uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output)
156
5.27M
{
157
61.3M
  for (; len > 0 && *input != '\0'; input++, len--)
158
56.0M
    uni_ucs4_to_utf8_c(*input, output);
159
5.27M
}
160
161
void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output)
162
56.1M
{
163
56.1M
  unsigned char first;
164
56.1M
  int bitpos;
165
166
56.1M
  if (chr < 0x80) {
167
38.0M
    buffer_append_c(output, chr);
168
38.0M
    return;
169
38.0M
  }
170
171
56.1M
  i_assert(uni_is_valid_ucs4(chr));
172
173
18.0M
  if (chr < (1 << (6 + 5))) {
174
    /* 110xxxxx */
175
511k
    bitpos = 6;
176
511k
    first = 0x80 | 0x40;
177
17.5M
  } else if (chr < (1 << ((2*6) + 4))) {
178
    /* 1110xxxx */
179
17.5M
    bitpos = 2*6;
180
17.5M
    first = 0x80 | 0x40 | 0x20;
181
17.5M
  } else if (chr < (1 << ((3*6) + 3))) {
182
    /* 11110xxx */
183
6.91k
    bitpos = 3*6;
184
6.91k
    first = 0x80 | 0x40 | 0x20 | 0x10;
185
6.91k
  } else if (chr < (1 << ((4*6) + 2))) {
186
    /* 111110xx */
187
0
    bitpos = 4*6;
188
0
    first = 0x80 | 0x40 | 0x20 | 0x10 | 0x08;
189
0
  } else {
190
    /* 1111110x */
191
0
    bitpos = 5*6;
192
0
    first = 0x80 | 0x40 | 0x20 | 0x10 | 0x08 | 0x04;
193
0
  }
194
18.0M
  buffer_append_c(output, first | (chr >> bitpos));
195
196
35.6M
  do {
197
35.6M
    bitpos -= 6;
198
35.6M
    buffer_append_c(output, 0x80 | ((chr >> bitpos) & 0x3f));
199
35.6M
  } while (bitpos > 0);
200
18.0M
}
201
202
unsigned int uni_utf8_strlen(const char *input)
203
0
{
204
0
  return uni_utf8_strlen_n(input, strlen(input));
205
0
}
206
207
unsigned int uni_utf8_strlen_n(const void *input, size_t size)
208
0
{
209
0
  size_t partial_pos;
210
211
0
  return uni_utf8_partial_strlen_n(input, size, &partial_pos);
212
0
}
213
214
unsigned int uni_utf8_partial_strlen_n(const void *_input, size_t size,
215
               size_t *partial_pos_r)
216
3.99M
{
217
3.99M
  const unsigned char *input = _input;
218
3.99M
  unsigned int count, len = 0;
219
3.99M
  size_t i;
220
221
365M
  for (i = 0; i < size; ) {
222
362M
    count = uni_utf8_char_bytes(input[i]);
223
362M
    if (i + count > size)
224
7.11k
      break;
225
362M
    i += count;
226
362M
    len++;
227
362M
  }
228
3.99M
  *partial_pos_r = i;
229
3.99M
  return len;
230
3.99M
}
231
232
unichar_t uni_ucs4_to_titlecase(unichar_t chr)
233
0
{
234
0
  const struct unicode_code_point_data *cp_data =
235
0
    unicode_code_point_get_data(chr);
236
237
0
  if (cp_data->simple_titlecase_mapping != 0x0000)
238
0
    return cp_data->simple_titlecase_mapping;
239
0
  return chr;
240
0
}
241
242
static void output_add_replacement_char(buffer_t *output)
243
230k
{
244
230k
  if (output->used >= UTF8_REPLACEMENT_CHAR_LEN &&
245
228k
      memcmp(CONST_PTR_OFFSET(output->data,
246
228k
            output->used - UTF8_REPLACEMENT_CHAR_LEN),
247
228k
       utf8_replacement_char, UTF8_REPLACEMENT_CHAR_LEN) == 0) {
248
    /* don't add the replacement char multiple times */
249
214k
    return;
250
214k
  }
251
15.7k
  buffer_append(output, utf8_replacement_char, UTF8_REPLACEMENT_CHAR_LEN);
252
15.7k
}
253
254
int uni_utf8_run_transform(const void *_input, size_t size,
255
         struct unicode_transform *trans, buffer_t *output,
256
         const char **error_r)
257
1.43M
{
258
1.43M
  struct unicode_transform *trans_last =
259
1.43M
    unicode_transform_get_last(trans);
260
1.43M
  struct unicode_buffer_sink sink;
261
1.43M
  const unsigned char *input = _input;
262
1.43M
  unichar_t chr;
263
1.43M
  ssize_t sret;
264
1.43M
  bool got_chr = FALSE, bad_cp = FALSE;
265
1.43M
  int ret = 0;
266
267
1.43M
  unicode_buffer_sink_init(&sink, output);
268
1.43M
  unicode_transform_chain(trans_last, &sink.transform);
269
270
150M
  while (size > 0 || got_chr) {
271
149M
    if (!got_chr) {
272
149M
      int bytes = uni_utf8_get_char_n(input, size, &chr);
273
149M
      if (bytes <= 0) {
274
        /* Invalid input. try the next byte. */
275
18.9M
        ret = -1;
276
18.9M
        input++; size--;
277
18.9M
        if (!bad_cp) {
278
4.09M
               chr = UNICODE_REPLACEMENT_CHAR;
279
4.09M
               bad_cp = TRUE;
280
4.09M
        }
281
130M
      } else {
282
130M
        input += bytes;
283
130M
        size -= bytes;
284
130M
        bad_cp = FALSE;
285
130M
      }
286
149M
    }
287
288
149M
    sret = unicode_transform_input(trans, &chr, 1, error_r);
289
149M
    if (sret < 0)
290
0
      return -1;
291
149M
    if (sret > 0)
292
148M
      got_chr = FALSE;
293
149M
  }
294
295
1.43M
  int fret = unicode_transform_flush(trans, error_r);
296
1.43M
  if (fret < 0)
297
0
    i_panic("unicode_transform_flush(): %s", *error_r);
298
1.43M
  i_assert(fret == 1);
299
1.43M
  return ret;
300
1.43M
}
301
302
static inline int
303
uni_utf8_write_nf_common(const void *_input, size_t size,
304
       enum unicode_nf_type nf_type, buffer_t *output)
305
1.43M
{
306
1.43M
  static struct unicode_nf_context ctx;
307
1.43M
  const char *error;
308
309
1.43M
  unicode_nf_init(&ctx, nf_type);
310
311
1.43M
  return uni_utf8_run_transform(_input, size, &ctx.transform, output,
312
1.43M
              &error);
313
1.43M
}
314
315
int uni_utf8_write_nfd(const void *input, size_t size, buffer_t *output)
316
0
{
317
0
  return uni_utf8_write_nf_common(input, size, UNICODE_NFD, output);
318
0
}
319
320
int uni_utf8_write_nfkd(const void *input, size_t size, buffer_t *output)
321
0
{
322
0
  return uni_utf8_write_nf_common(input, size, UNICODE_NFKD, output);
323
0
}
324
325
int uni_utf8_write_nfc(const void *input, size_t size, buffer_t *output)
326
1.43M
{
327
1.43M
  return uni_utf8_write_nf_common(input, size, UNICODE_NFC, output);
328
1.43M
}
329
330
int uni_utf8_write_nfkc(const void *input, size_t size, buffer_t *output)
331
0
{
332
0
  return uni_utf8_write_nf_common(input, size, UNICODE_NFKC, output);
333
0
}
334
335
int uni_utf8_to_nfd(const void *input, size_t size, const char **output_r)
336
0
{
337
0
  buffer_t *output = t_buffer_create(size);
338
339
0
  if (uni_utf8_write_nf_common(input, size, UNICODE_NFD, output) < 0)
340
0
    return -1;
341
0
  *output_r = str_c(output);
342
0
  return 0;
343
0
}
344
345
int uni_utf8_to_nfkd(const void *input, size_t size, const char **output_r)
346
0
{
347
0
  buffer_t *output = t_buffer_create(size);
348
349
0
  if (uni_utf8_write_nf_common(input, size, UNICODE_NFKD, output) < 0)
350
0
    return -1;
351
0
  *output_r = str_c(output);
352
0
  return 0;
353
0
}
354
355
int uni_utf8_to_nfc(const void *input, size_t size, const char **output_r)
356
0
{
357
0
  buffer_t *output = t_buffer_create(size);
358
359
0
  if (uni_utf8_write_nf_common(input, size, UNICODE_NFC, output) < 0)
360
0
    return -1;
361
0
  *output_r = str_c(output);
362
0
  return 0;
363
0
}
364
365
int uni_utf8_to_nfkc(const void *input, size_t size, const char **output_r)
366
0
{
367
0
  buffer_t *output = t_buffer_create(size);
368
369
0
  if (uni_utf8_write_nf_common(input, size, UNICODE_NFKC, output) < 0)
370
0
    return -1;
371
0
  *output_r = str_c(output);
372
0
  return 0;
373
0
}
374
375
static int
376
uni_utf8_is_nf(const void *_input, size_t size, enum unicode_nf_type type)
377
0
{
378
0
  static struct unicode_nf_checker unc;
379
0
  const unsigned char *input = _input;
380
0
  unichar_t chr;
381
0
  int ret;
382
383
0
  unicode_nf_checker_init(&unc, type);
384
385
0
  while (size > 0) {
386
0
    const struct unicode_code_point_data *cp_data = NULL;
387
0
    int bytes = uni_utf8_get_char_n(input, size, &chr);
388
0
    if (bytes <= 0)
389
0
      return -1;
390
0
    input += bytes;
391
0
    size -= bytes;
392
393
0
    ret = unicode_nf_checker_input(&unc, chr, &cp_data);
394
0
    if (ret <= 0)
395
0
      return ret;
396
0
  }
397
398
0
  return unicode_nf_checker_finish(&unc);
399
0
}
400
401
int uni_utf8_is_nfd(const void *input, size_t size)
402
0
{
403
0
  return uni_utf8_is_nf(input, size, UNICODE_NFD);
404
0
}
405
406
int uni_utf8_is_nfkd(const void *input, size_t size)
407
0
{
408
0
  return uni_utf8_is_nf(input, size, UNICODE_NFKD);
409
0
}
410
411
int uni_utf8_is_nfc(const void *input, size_t size)
412
0
{
413
0
  return uni_utf8_is_nf(input, size, UNICODE_NFC);
414
0
}
415
416
int uni_utf8_is_nfkc(const void *input, size_t size)
417
0
{
418
0
  return uni_utf8_is_nf(input, size, UNICODE_NFKC);
419
0
}
420
421
int uni_utf8_write_uppercase(const void *_input, size_t size, buffer_t *output)
422
0
{
423
0
  static struct unicode_casemap map;
424
0
  const char *error;
425
426
0
  unicode_casemap_init_uppercase(&map);
427
428
0
  return uni_utf8_run_transform(_input, size, &map.transform, output,
429
0
              &error);
430
0
}
431
432
int uni_utf8_write_lowercase(const void *_input, size_t size, buffer_t *output)
433
0
{
434
0
  static struct unicode_casemap map;
435
0
  const char *error;
436
437
0
  unicode_casemap_init_lowercase(&map);
438
439
0
  return uni_utf8_run_transform(_input, size, &map.transform, output,
440
0
              &error);
441
0
}
442
443
int uni_utf8_write_casefold(const void *_input, size_t size, buffer_t *output)
444
0
{
445
0
  static struct unicode_casemap map;
446
0
  const char *error;
447
448
0
  unicode_casemap_init_casefold(&map);
449
450
0
  return uni_utf8_run_transform(_input, size, &map.transform, output,
451
0
              &error);
452
0
}
453
454
int uni_utf8_to_uppercase(const void *input, size_t size, const char **output_r)
455
0
{
456
0
  buffer_t *output = t_buffer_create(size);
457
0
  int ret;
458
459
0
  ret = uni_utf8_write_uppercase(input, size, output);
460
0
  *output_r = str_c(output);
461
0
  return ret;
462
0
}
463
464
int uni_utf8_to_lowercase(const void *input, size_t size, const char **output_r)
465
0
{
466
0
  buffer_t *output = t_buffer_create(size);
467
0
  int ret;
468
469
0
  ret = uni_utf8_write_lowercase(input, size, output);
470
0
  *output_r = str_c(output);
471
0
  return ret;
472
0
}
473
474
int uni_utf8_to_casefold(const void *input, size_t size, const char **output_r)
475
0
{
476
0
  buffer_t *output = t_buffer_create(size);
477
0
  int ret;
478
479
0
  ret = uni_utf8_write_casefold(input, size, output);
480
0
  *output_r = str_c(output);
481
0
  return ret;
482
0
}
483
484
int uni_utf8_to_decomposed_titlecase(const void *_input, size_t size,
485
             buffer_t *output)
486
0
{
487
0
  struct unicode_rfc5051_context ctx;
488
0
  const unsigned char *input = _input;
489
0
  unichar_t chr;
490
0
  int ret = 0;
491
492
0
  unicode_rfc5051_init(&ctx);
493
494
0
  while (size > 0) {
495
0
    int bytes = uni_utf8_get_char_n(input, size, &chr);
496
0
    if (bytes <= 0) {
497
      /* invalid input. try the next byte. */
498
0
      ret = -1;
499
0
      input++; size--;
500
0
      output_add_replacement_char(output);
501
0
      continue;
502
0
    }
503
0
    input += bytes;
504
0
    size -= bytes;
505
506
0
    const unichar_t *norm;
507
0
    size_t norm_len;
508
509
0
    norm_len = unicode_rfc5051_normalize(&ctx, chr, &norm);
510
0
    uni_ucs4_to_utf8(norm, norm_len, output);
511
0
  }
512
0
  return ret;
513
0
}
514
515
static inline unsigned int
516
is_valid_utf8_seq(const unsigned char *input, unsigned int size)
517
333M
{
518
333M
  unichar_t chr;
519
333M
  int len = uni_utf8_get_char_n(input, size, &chr);
520
333M
  return len <= 0 ? 0 : len;
521
333M
}
522
523
static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,
524
             size_t *pos_r)
525
3.29M
{
526
3.29M
  size_t i, len;
527
528
  /* find the first invalid utf8 sequence */
529
317M
  for (i = 0; i < size;) {
530
313M
    if (input[i] < 0x80)
531
1.38M
      i++;
532
312M
    else {
533
312M
      len = is_valid_utf8_seq(input + i, size-i);
534
312M
      if (unlikely(len == 0)) {
535
7.58k
        *pos_r = i;
536
7.58k
        return -1;
537
7.58k
      }
538
312M
      i += len;
539
312M
    }
540
313M
  }
541
3.28M
  return 0;
542
3.29M
}
543
544
bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
545
           buffer_t *buf)
546
3.24M
{
547
3.24M
  size_t i, len;
548
549
3.24M
  if (uni_utf8_find_invalid_pos(input, size, &i) == 0)
550
3.23M
    return TRUE;
551
552
  /* broken utf-8 input - skip the broken characters */
553
7.43k
  buffer_append(buf, input, i++);
554
555
7.43k
  output_add_replacement_char(buf);
556
26.3M
  while (i < size) {
557
26.3M
    if (input[i] < 0x80) {
558
5.15M
      buffer_append_c(buf, input[i++]);
559
5.15M
      continue;
560
5.15M
    }
561
562
21.2M
    len = is_valid_utf8_seq(input + i, size-i);
563
21.2M
    if (len == 0) {
564
222k
      i++;
565
222k
      output_add_replacement_char(buf);
566
222k
      continue;
567
222k
    }
568
20.9M
    buffer_append(buf, input + i, len);
569
20.9M
    i += len;
570
20.9M
  }
571
7.43k
  return FALSE;
572
3.24M
}
573
574
bool uni_utf8_str_is_valid(const char *str)
575
52.2k
{
576
52.2k
  size_t i;
577
578
52.2k
  return uni_utf8_find_invalid_pos((const unsigned char *)str,
579
52.2k
           strlen(str), &i) == 0;
580
52.2k
}
581
582
bool uni_utf8_data_is_valid(const unsigned char *data, size_t size)
583
0
{
584
0
  size_t i;
585
586
0
  return uni_utf8_find_invalid_pos(data, size, &i) == 0;
587
0
}
588
589
size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size,
590
            size_t max_new_size)
591
6.44k
{
592
6.44k
  if (max_new_size >= old_size)
593
0
    return old_size;
594
6.44k
  if (max_new_size == 0)
595
0
    return 0;
596
597
6.44k
  if ((data[max_new_size] & 0x80) == 0)
598
6.43k
    return max_new_size;
599
17
  while (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0x80)
600
5
    max_new_size--;
601
12
  if (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0xc0)
602
12
    max_new_size--;
603
12
  return max_new_size;
604
6.44k
}
605
606
/*
607
 * Grapheme clusters
608
 */
609
610
void uni_gc_scanner_init(struct uni_gc_scanner *gcsc,
611
       const void *input, size_t size)
612
0
{
613
0
  i_zero(gcsc);
614
0
  unicode_gc_break_init(&gcsc->gcbrk);
615
0
  gcsc->p = input;
616
0
  gcsc->pend = gcsc->p + size;
617
0
}
618
619
bool uni_gc_scan_shift(struct uni_gc_scanner *gcsc)
620
0
{
621
0
  bool first = (gcsc->poffset == NULL);
622
623
  /* Reset offset to last grapheme boundary (after the last grapheme
624
     cluster we indicated). */
625
0
  gcsc->poffset = gcsc->p;
626
  /* Shift pointer past last code point; starts the next grapheme cluster
627
     we shall compose in this call. */
628
0
  gcsc->p += gcsc->cp_size;
629
0
  gcsc->cp_size = 0;
630
0
  while (gcsc->p < gcsc->pend) {
631
    /* Decode next UTF-8 code point */
632
0
    gcsc->cp_size = uni_utf8_get_char_n(
633
0
      gcsc->p, gcsc->pend - gcsc->p, &gcsc->cp);
634
    /* We expect valid and complete UTF-8 input */
635
0
    i_assert(gcsc->cp_size > 0);
636
637
    /* Determine whether there exists a grapheme cluster boundary
638
       before this code point. */
639
0
    const struct unicode_code_point_data *cp_data = NULL;
640
0
    if (unicode_gc_break_cp(&gcsc->gcbrk, gcsc->cp, &cp_data)) {
641
      /* Yes, but ignore the very first grapheme boundary that
642
         occurs at the start of input. */
643
0
      if (!first) {
644
        /* Grapheme cluster detected, but it does *NOT*
645
           include the last code point we decoded just
646
           now. */
647
0
        i_assert(gcsc->p > gcsc->poffset);
648
0
        return TRUE;
649
0
      }
650
0
      first = FALSE;
651
0
    }
652
653
    /* Shift pointer past last code point; include this in the next
654
       grapheme cluster we shall compose in this call. */
655
0
    gcsc->p += gcsc->cp_size;
656
0
    gcsc->cp_size = 0;
657
0
  }
658
  /* Return whether there is any last remaining grapheme cluster. */
659
0
  return (gcsc->p > gcsc->poffset);
660
0
}