Coverage Report

Created: 2026-03-31 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libunistring/lib/uninorm/u-normalize-internal.h
Line
Count
Source
1
/* Decomposition and composition of Unicode strings.
2
   Copyright (C) 2009-2026 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
UNIT *
19
FUNC (uninorm_t nf, const UNIT *s, size_t n,
20
      UNIT *resultbuf, size_t *lengthp)
21
197k
{
22
197k
  int (*decomposer) (ucs4_t uc, ucs4_t *decomposition) = nf->decomposer;
23
197k
  ucs4_t (*composer) (ucs4_t uc1, ucs4_t uc2) = nf->composer;
24
25
  /* The result being accumulated.  */
26
197k
  UNIT *result;
27
197k
  size_t allocated;
28
197k
  if (resultbuf == NULL)
29
197k
    {
30
197k
      result = NULL;
31
197k
      allocated = 0;
32
197k
    }
33
0
  else
34
0
    {
35
0
      result = resultbuf;
36
0
      allocated = *lengthp;
37
0
    }
38
197k
  size_t length = 0;
39
40
  /* The buffer for sorting.  */
41
197k
  #define SORTBUF_PREALLOCATED 64
42
197k
  struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED];
43
197k
  struct ucs4_with_ccc *sortbuf = /* array of size 2 * sortbuf_allocated */
44
197k
    sortbuf_preallocated;
45
197k
  size_t sortbuf_allocated = SORTBUF_PREALLOCATED;
46
197k
  size_t sortbuf_count = 0;
47
48
197k
  {
49
197k
    const UNIT *s_end = s + n;
50
51
197k
    for (;;)
52
2.24M
      {
53
2.24M
        int count;
54
2.24M
        ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
55
2.24M
        int decomposed_count;
56
57
2.24M
        if (s < s_end)
58
2.04M
          {
59
            /* Fetch the next character.  */
60
2.04M
            count = U_MBTOUC_UNSAFE (&decomposed[0], s, s_end - s);
61
2.04M
            decomposed_count = 1;
62
63
            /* Decompose it, recursively.
64
               It would be possible to precompute the recursive decomposition
65
               and store it in a table.  But this would significantly increase
66
               the size of the decomposition tables, because for example for
67
               U+1FC1 the recursive canonical decomposition and the recursive
68
               compatibility decomposition are different.  */
69
4.36M
            for (int curr = 0; curr < decomposed_count; )
70
2.31M
              {
71
                /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
72
                   all elements are atomic.  */
73
2.31M
                ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
74
2.31M
                int curr_decomposed_count;
75
76
2.31M
                curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
77
2.31M
                if (curr_decomposed_count >= 0)
78
136k
                  {
79
                    /* Move curr_decomposed[0..curr_decomposed_count-1] over
80
                       decomposed[curr], making room.  It's not worth using
81
                       memcpy() here, since the counts are so small.  */
82
136k
                    int shift = curr_decomposed_count - 1;
83
84
136k
                    if (shift < 0)
85
0
                      abort ();
86
136k
                    if (shift > 0)
87
134k
                      {
88
134k
                        decomposed_count += shift;
89
134k
                        if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
90
0
                          abort ();
91
147k
                        for (int j = decomposed_count - 1 - shift; j > curr; j--)
92
13.2k
                          decomposed[j + shift] = decomposed[j];
93
134k
                      }
94
406k
                    for (; shift >= 0; shift--)
95
270k
                      decomposed[curr + shift] = curr_decomposed[shift];
96
136k
                  }
97
2.18M
                else
98
2.18M
                  {
99
                    /* decomposed[curr] is atomic.  */
100
2.18M
                    curr++;
101
2.18M
                  }
102
2.31M
              }
103
2.04M
          }
104
197k
        else
105
197k
          {
106
197k
            count = 0;
107
197k
            decomposed_count = 0;
108
197k
          }
109
110
2.24M
        int i = 0;
111
2.24M
        for (;;)
112
4.42M
          {
113
4.42M
            ucs4_t uc;
114
4.42M
            int ccc;
115
116
4.42M
            if (s < s_end)
117
4.22M
              {
118
                /* Fetch the next character from the decomposition.  */
119
4.22M
                if (i == decomposed_count)
120
2.04M
                  break;
121
2.18M
                uc = decomposed[i];
122
2.18M
                ccc = uc_combining_class (uc);
123
2.18M
              }
124
197k
            else
125
197k
              {
126
                /* End of string reached.  */
127
197k
                uc = 0;
128
197k
                ccc = 0;
129
197k
              }
130
131
2.37M
            if (ccc == 0)
132
2.07M
              {
133
                /* Apply the canonical ordering algorithm to the accumulated
134
                   sequence of characters.  */
135
2.07M
                if (sortbuf_count > 1)
136
114k
                  gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
137
114k
                                                           sortbuf + sortbuf_count);
138
139
2.07M
                if (composer != NULL)
140
2.07M
                  {
141
                    /* Attempt to combine decomposed characters, as specified
142
                       in the Unicode Standard Annex #15 "Unicode Normalization
143
                       Forms".  We need to check
144
                         1. whether the first accumulated character is a
145
                            "starter" (i.e. has ccc = 0).  This is usually the
146
                            case.  But when the string starts with a
147
                            non-starter, the sortbuf also starts with a
148
                            non-starter.  Btw, this check could also be
149
                            omitted, because the composition table has only
150
                            entries (code1, code2) for which code1 is a
151
                            starter; if the first accumulated character is not
152
                            a starter, no lookup will succeed.
153
                         2. If the sortbuf has more than one character, check
154
                            for each of these characters that are not "blocked"
155
                            from the starter (i.e. have a ccc that is higher
156
                            than the ccc of the previous character) whether it
157
                            can be combined with the first character.
158
                         3. If only one character is left in sortbuf, check
159
                            whether it can be combined with the next character
160
                            (also a starter).  */
161
2.07M
                    if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
162
1.87M
                      {
163
2.11M
                        for (size_t j = 1; j < sortbuf_count; )
164
240k
                          {
165
240k
                            if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
166
132k
                              {
167
132k
                                ucs4_t combined =
168
132k
                                  composer (sortbuf[0].code, sortbuf[j].code);
169
132k
                                if (combined)
170
103k
                                  {
171
103k
                                    sortbuf[0].code = combined;
172
                                    /* sortbuf[0].ccc = 0, still valid.  */
173
212k
                                    for (size_t k = j + 1; k < sortbuf_count; k++)
174
108k
                                      sortbuf[k - 1] = sortbuf[k];
175
103k
                                    sortbuf_count--;
176
103k
                                    continue;
177
103k
                                  }
178
132k
                              }
179
137k
                            j++;
180
137k
                          }
181
1.87M
                        if (s < s_end && sortbuf_count == 1)
182
1.66M
                          {
183
1.66M
                            ucs4_t combined =
184
1.66M
                              composer (sortbuf[0].code, uc);
185
1.66M
                            if (combined)
186
7.82k
                              {
187
7.82k
                                uc = combined;
188
7.82k
                                ccc = 0;
189
                                /* uc could be further combined with subsequent
190
                                   characters.  So don't put it into sortbuf[0] in
191
                                   this round, only in the next round.  */
192
7.82k
                                sortbuf_count = 0;
193
7.82k
                              }
194
1.66M
                          }
195
1.87M
                      }
196
2.07M
                  }
197
198
4.14M
                for (size_t j = 0; j < sortbuf_count; j++)
199
2.06M
                  {
200
2.06M
                    ucs4_t muc = sortbuf[j].code;
201
202
                    /* Append muc to the result accumulator.  */
203
2.06M
                    if (length < allocated)
204
1.86M
                      {
205
1.86M
                        int ret =
206
1.86M
                          U_UCTOMB (result + length, muc, allocated - length);
207
1.86M
                        if (ret == -1)
208
0
                          {
209
0
                            errno = EINVAL;
210
0
                            goto fail;
211
0
                          }
212
1.86M
                        if (ret >= 0)
213
1.86M
                          {
214
1.86M
                            length += ret;
215
1.86M
                            goto done_appending;
216
1.86M
                          }
217
1.86M
                      }
218
201k
                    {
219
201k
                      size_t old_allocated = allocated;
220
201k
                      size_t new_allocated = 2 * old_allocated;
221
201k
                      if (new_allocated < 64)
222
194k
                        new_allocated = 64;
223
201k
                      if (new_allocated < old_allocated) /* integer overflow? */
224
0
                        abort ();
225
201k
                      {
226
201k
                        UNIT *larger_result;
227
201k
                        if (result == NULL)
228
194k
                          {
229
194k
                            larger_result =
230
194k
                              (UNIT *) malloc (new_allocated * sizeof (UNIT));
231
194k
                            if (larger_result == NULL)
232
0
                              {
233
0
                                errno = ENOMEM;
234
0
                                goto fail;
235
0
                              }
236
194k
                          }
237
7.25k
                        else if (result == resultbuf)
238
0
                          {
239
0
                            larger_result =
240
0
                              (UNIT *) malloc (new_allocated * sizeof (UNIT));
241
0
                            if (larger_result == NULL)
242
0
                              {
243
0
                                errno = ENOMEM;
244
0
                                goto fail;
245
0
                              }
246
0
                            U_CPY (larger_result, resultbuf, length);
247
0
                          }
248
7.25k
                        else
249
7.25k
                          {
250
7.25k
                            larger_result =
251
7.25k
                              (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
252
7.25k
                            if (larger_result == NULL)
253
0
                              {
254
0
                                errno = ENOMEM;
255
0
                                goto fail;
256
0
                              }
257
7.25k
                          }
258
201k
                        result = larger_result;
259
201k
                        allocated = new_allocated;
260
201k
                        {
261
201k
                          int ret =
262
201k
                            U_UCTOMB (result + length, muc, allocated - length);
263
201k
                          if (ret == -1)
264
0
                            {
265
0
                              errno = EINVAL;
266
0
                              goto fail;
267
0
                            }
268
201k
                          if (ret < 0)
269
0
                            abort ();
270
201k
                          length += ret;
271
201k
                          goto done_appending;
272
201k
                        }
273
201k
                      }
274
201k
                    }
275
2.06M
                   done_appending: ;
276
2.06M
                  }
277
278
                /* sortbuf is now empty.  */
279
2.07M
                sortbuf_count = 0;
280
2.07M
              }
281
282
2.37M
            if (!(s < s_end))
283
              /* End of string reached.  */
284
197k
              break;
285
286
            /* Append (uc, ccc) to sortbuf.  */
287
2.18M
            if (sortbuf_count == sortbuf_allocated)
288
1.42k
              {
289
1.42k
                sortbuf_allocated = 2 * sortbuf_allocated;
290
1.42k
                if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
291
0
                  abort ();
292
1.42k
                struct ucs4_with_ccc *new_sortbuf =
293
1.42k
                  (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
294
1.42k
                if (new_sortbuf == NULL)
295
0
                  {
296
0
                    errno = ENOMEM;
297
0
                    goto fail;
298
0
                  }
299
1.42k
                memcpy (new_sortbuf, sortbuf,
300
1.42k
                        sortbuf_count * sizeof (struct ucs4_with_ccc));
301
1.42k
                if (sortbuf != sortbuf_preallocated)
302
1.42k
                  free (sortbuf);
303
1.42k
                sortbuf = new_sortbuf;
304
1.42k
              }
305
2.18M
            sortbuf[sortbuf_count].code = uc;
306
2.18M
            sortbuf[sortbuf_count].ccc = ccc;
307
2.18M
            sortbuf_count++;
308
309
2.18M
            i++;
310
2.18M
          }
311
312
2.24M
        if (!(s < s_end))
313
          /* End of string reached.  */
314
197k
          break;
315
316
2.04M
        s += count;
317
2.04M
      }
318
197k
  }
319
320
197k
  if (length == 0)
321
2.63k
    {
322
2.63k
      if (result == NULL)
323
2.63k
        {
324
          /* Return a non-NULL value.  NULL means error.  */
325
2.63k
          result = (UNIT *) malloc (1);
326
2.63k
          if (result == NULL)
327
0
            {
328
0
              errno = ENOMEM;
329
0
              goto fail;
330
0
            }
331
2.63k
        }
332
2.63k
    }
333
194k
  else if (result != resultbuf && length < allocated)
334
192k
    {
335
      /* Shrink the allocated memory if possible.  */
336
192k
      UNIT *memory = (UNIT *) realloc (result, length * sizeof (UNIT));
337
192k
      if (memory != NULL)
338
192k
        result = memory;
339
192k
    }
340
341
197k
  if (sortbuf_count > 0)
342
0
    abort ();
343
197k
  if (sortbuf != sortbuf_preallocated)
344
197k
    free (sortbuf);
345
346
197k
  *lengthp = length;
347
197k
  return result;
348
349
0
 fail:
350
0
  {
351
0
    int saved_errno = errno;
352
0
    if (sortbuf != sortbuf_preallocated)
353
0
      free (sortbuf);
354
0
    if (result != resultbuf)
355
0
      free (result);
356
0
    errno = saved_errno;
357
0
  }
358
  return NULL;
359
197k
}
Unexecuted instantiation: u8_normalize
u32_normalize
Line
Count
Source
21
197k
{
22
197k
  int (*decomposer) (ucs4_t uc, ucs4_t *decomposition) = nf->decomposer;
23
197k
  ucs4_t (*composer) (ucs4_t uc1, ucs4_t uc2) = nf->composer;
24
25
  /* The result being accumulated.  */
26
197k
  UNIT *result;
27
197k
  size_t allocated;
28
197k
  if (resultbuf == NULL)
29
197k
    {
30
197k
      result = NULL;
31
197k
      allocated = 0;
32
197k
    }
33
0
  else
34
0
    {
35
0
      result = resultbuf;
36
0
      allocated = *lengthp;
37
0
    }
38
197k
  size_t length = 0;
39
40
  /* The buffer for sorting.  */
41
197k
  #define SORTBUF_PREALLOCATED 64
42
197k
  struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED];
43
197k
  struct ucs4_with_ccc *sortbuf = /* array of size 2 * sortbuf_allocated */
44
197k
    sortbuf_preallocated;
45
197k
  size_t sortbuf_allocated = SORTBUF_PREALLOCATED;
46
197k
  size_t sortbuf_count = 0;
47
48
197k
  {
49
197k
    const UNIT *s_end = s + n;
50
51
197k
    for (;;)
52
2.24M
      {
53
2.24M
        int count;
54
2.24M
        ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
55
2.24M
        int decomposed_count;
56
57
2.24M
        if (s < s_end)
58
2.04M
          {
59
            /* Fetch the next character.  */
60
2.04M
            count = U_MBTOUC_UNSAFE (&decomposed[0], s, s_end - s);
61
2.04M
            decomposed_count = 1;
62
63
            /* Decompose it, recursively.
64
               It would be possible to precompute the recursive decomposition
65
               and store it in a table.  But this would significantly increase
66
               the size of the decomposition tables, because for example for
67
               U+1FC1 the recursive canonical decomposition and the recursive
68
               compatibility decomposition are different.  */
69
4.36M
            for (int curr = 0; curr < decomposed_count; )
70
2.31M
              {
71
                /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
72
                   all elements are atomic.  */
73
2.31M
                ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
74
2.31M
                int curr_decomposed_count;
75
76
2.31M
                curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
77
2.31M
                if (curr_decomposed_count >= 0)
78
136k
                  {
79
                    /* Move curr_decomposed[0..curr_decomposed_count-1] over
80
                       decomposed[curr], making room.  It's not worth using
81
                       memcpy() here, since the counts are so small.  */
82
136k
                    int shift = curr_decomposed_count - 1;
83
84
136k
                    if (shift < 0)
85
0
                      abort ();
86
136k
                    if (shift > 0)
87
134k
                      {
88
134k
                        decomposed_count += shift;
89
134k
                        if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
90
0
                          abort ();
91
147k
                        for (int j = decomposed_count - 1 - shift; j > curr; j--)
92
13.2k
                          decomposed[j + shift] = decomposed[j];
93
134k
                      }
94
406k
                    for (; shift >= 0; shift--)
95
270k
                      decomposed[curr + shift] = curr_decomposed[shift];
96
136k
                  }
97
2.18M
                else
98
2.18M
                  {
99
                    /* decomposed[curr] is atomic.  */
100
2.18M
                    curr++;
101
2.18M
                  }
102
2.31M
              }
103
2.04M
          }
104
197k
        else
105
197k
          {
106
197k
            count = 0;
107
197k
            decomposed_count = 0;
108
197k
          }
109
110
2.24M
        int i = 0;
111
2.24M
        for (;;)
112
4.42M
          {
113
4.42M
            ucs4_t uc;
114
4.42M
            int ccc;
115
116
4.42M
            if (s < s_end)
117
4.22M
              {
118
                /* Fetch the next character from the decomposition.  */
119
4.22M
                if (i == decomposed_count)
120
2.04M
                  break;
121
2.18M
                uc = decomposed[i];
122
2.18M
                ccc = uc_combining_class (uc);
123
2.18M
              }
124
197k
            else
125
197k
              {
126
                /* End of string reached.  */
127
197k
                uc = 0;
128
197k
                ccc = 0;
129
197k
              }
130
131
2.37M
            if (ccc == 0)
132
2.07M
              {
133
                /* Apply the canonical ordering algorithm to the accumulated
134
                   sequence of characters.  */
135
2.07M
                if (sortbuf_count > 1)
136
114k
                  gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
137
114k
                                                           sortbuf + sortbuf_count);
138
139
2.07M
                if (composer != NULL)
140
2.07M
                  {
141
                    /* Attempt to combine decomposed characters, as specified
142
                       in the Unicode Standard Annex #15 "Unicode Normalization
143
                       Forms".  We need to check
144
                         1. whether the first accumulated character is a
145
                            "starter" (i.e. has ccc = 0).  This is usually the
146
                            case.  But when the string starts with a
147
                            non-starter, the sortbuf also starts with a
148
                            non-starter.  Btw, this check could also be
149
                            omitted, because the composition table has only
150
                            entries (code1, code2) for which code1 is a
151
                            starter; if the first accumulated character is not
152
                            a starter, no lookup will succeed.
153
                         2. If the sortbuf has more than one character, check
154
                            for each of these characters that are not "blocked"
155
                            from the starter (i.e. have a ccc that is higher
156
                            than the ccc of the previous character) whether it
157
                            can be combined with the first character.
158
                         3. If only one character is left in sortbuf, check
159
                            whether it can be combined with the next character
160
                            (also a starter).  */
161
2.07M
                    if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
162
1.87M
                      {
163
2.11M
                        for (size_t j = 1; j < sortbuf_count; )
164
240k
                          {
165
240k
                            if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
166
132k
                              {
167
132k
                                ucs4_t combined =
168
132k
                                  composer (sortbuf[0].code, sortbuf[j].code);
169
132k
                                if (combined)
170
103k
                                  {
171
103k
                                    sortbuf[0].code = combined;
172
                                    /* sortbuf[0].ccc = 0, still valid.  */
173
212k
                                    for (size_t k = j + 1; k < sortbuf_count; k++)
174
108k
                                      sortbuf[k - 1] = sortbuf[k];
175
103k
                                    sortbuf_count--;
176
103k
                                    continue;
177
103k
                                  }
178
132k
                              }
179
137k
                            j++;
180
137k
                          }
181
1.87M
                        if (s < s_end && sortbuf_count == 1)
182
1.66M
                          {
183
1.66M
                            ucs4_t combined =
184
1.66M
                              composer (sortbuf[0].code, uc);
185
1.66M
                            if (combined)
186
7.82k
                              {
187
7.82k
                                uc = combined;
188
7.82k
                                ccc = 0;
189
                                /* uc could be further combined with subsequent
190
                                   characters.  So don't put it into sortbuf[0] in
191
                                   this round, only in the next round.  */
192
7.82k
                                sortbuf_count = 0;
193
7.82k
                              }
194
1.66M
                          }
195
1.87M
                      }
196
2.07M
                  }
197
198
4.14M
                for (size_t j = 0; j < sortbuf_count; j++)
199
2.06M
                  {
200
2.06M
                    ucs4_t muc = sortbuf[j].code;
201
202
                    /* Append muc to the result accumulator.  */
203
2.06M
                    if (length < allocated)
204
1.86M
                      {
205
1.86M
                        int ret =
206
1.86M
                          U_UCTOMB (result + length, muc, allocated - length);
207
1.86M
                        if (ret == -1)
208
0
                          {
209
0
                            errno = EINVAL;
210
0
                            goto fail;
211
0
                          }
212
1.86M
                        if (ret >= 0)
213
1.86M
                          {
214
1.86M
                            length += ret;
215
1.86M
                            goto done_appending;
216
1.86M
                          }
217
1.86M
                      }
218
201k
                    {
219
201k
                      size_t old_allocated = allocated;
220
201k
                      size_t new_allocated = 2 * old_allocated;
221
201k
                      if (new_allocated < 64)
222
194k
                        new_allocated = 64;
223
201k
                      if (new_allocated < old_allocated) /* integer overflow? */
224
0
                        abort ();
225
201k
                      {
226
201k
                        UNIT *larger_result;
227
201k
                        if (result == NULL)
228
194k
                          {
229
194k
                            larger_result =
230
194k
                              (UNIT *) malloc (new_allocated * sizeof (UNIT));
231
194k
                            if (larger_result == NULL)
232
0
                              {
233
0
                                errno = ENOMEM;
234
0
                                goto fail;
235
0
                              }
236
194k
                          }
237
7.25k
                        else if (result == resultbuf)
238
0
                          {
239
0
                            larger_result =
240
0
                              (UNIT *) malloc (new_allocated * sizeof (UNIT));
241
0
                            if (larger_result == NULL)
242
0
                              {
243
0
                                errno = ENOMEM;
244
0
                                goto fail;
245
0
                              }
246
0
                            U_CPY (larger_result, resultbuf, length);
247
0
                          }
248
7.25k
                        else
249
7.25k
                          {
250
7.25k
                            larger_result =
251
7.25k
                              (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
252
7.25k
                            if (larger_result == NULL)
253
0
                              {
254
0
                                errno = ENOMEM;
255
0
                                goto fail;
256
0
                              }
257
7.25k
                          }
258
201k
                        result = larger_result;
259
201k
                        allocated = new_allocated;
260
201k
                        {
261
201k
                          int ret =
262
201k
                            U_UCTOMB (result + length, muc, allocated - length);
263
201k
                          if (ret == -1)
264
0
                            {
265
0
                              errno = EINVAL;
266
0
                              goto fail;
267
0
                            }
268
201k
                          if (ret < 0)
269
0
                            abort ();
270
201k
                          length += ret;
271
201k
                          goto done_appending;
272
201k
                        }
273
201k
                      }
274
201k
                    }
275
2.06M
                   done_appending: ;
276
2.06M
                  }
277
278
                /* sortbuf is now empty.  */
279
2.07M
                sortbuf_count = 0;
280
2.07M
              }
281
282
2.37M
            if (!(s < s_end))
283
              /* End of string reached.  */
284
197k
              break;
285
286
            /* Append (uc, ccc) to sortbuf.  */
287
2.18M
            if (sortbuf_count == sortbuf_allocated)
288
1.42k
              {
289
1.42k
                sortbuf_allocated = 2 * sortbuf_allocated;
290
1.42k
                if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
291
0
                  abort ();
292
1.42k
                struct ucs4_with_ccc *new_sortbuf =
293
1.42k
                  (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
294
1.42k
                if (new_sortbuf == NULL)
295
0
                  {
296
0
                    errno = ENOMEM;
297
0
                    goto fail;
298
0
                  }
299
1.42k
                memcpy (new_sortbuf, sortbuf,
300
1.42k
                        sortbuf_count * sizeof (struct ucs4_with_ccc));
301
1.42k
                if (sortbuf != sortbuf_preallocated)
302
1.42k
                  free (sortbuf);
303
1.42k
                sortbuf = new_sortbuf;
304
1.42k
              }
305
2.18M
            sortbuf[sortbuf_count].code = uc;
306
2.18M
            sortbuf[sortbuf_count].ccc = ccc;
307
2.18M
            sortbuf_count++;
308
309
2.18M
            i++;
310
2.18M
          }
311
312
2.24M
        if (!(s < s_end))
313
          /* End of string reached.  */
314
197k
          break;
315
316
2.04M
        s += count;
317
2.04M
      }
318
197k
  }
319
320
197k
  if (length == 0)
321
2.63k
    {
322
2.63k
      if (result == NULL)
323
2.63k
        {
324
          /* Return a non-NULL value.  NULL means error.  */
325
2.63k
          result = (UNIT *) malloc (1);
326
2.63k
          if (result == NULL)
327
0
            {
328
0
              errno = ENOMEM;
329
0
              goto fail;
330
0
            }
331
2.63k
        }
332
2.63k
    }
333
194k
  else if (result != resultbuf && length < allocated)
334
192k
    {
335
      /* Shrink the allocated memory if possible.  */
336
192k
      UNIT *memory = (UNIT *) realloc (result, length * sizeof (UNIT));
337
192k
      if (memory != NULL)
338
192k
        result = memory;
339
192k
    }
340
341
197k
  if (sortbuf_count > 0)
342
0
    abort ();
343
197k
  if (sortbuf != sortbuf_preallocated)
344
197k
    free (sortbuf);
345
346
197k
  *lengthp = length;
347
197k
  return result;
348
349
0
 fail:
350
0
  {
351
0
    int saved_errno = errno;
352
0
    if (sortbuf != sortbuf_preallocated)
353
0
      free (sortbuf);
354
0
    if (result != resultbuf)
355
0
      free (result);
356
0
    errno = saved_errno;
357
0
  }
358
  return NULL;
359
197k
}