Coverage Report

Created: 2024-02-25 07:20

/src/libewf/libfvalue/libfvalue_utf8_string.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * UTF-8 string value functions
3
 *
4
 * Copyright (C) 2010-2024, Joachim Metz <joachim.metz@gmail.com>
5
 *
6
 * Refer to AUTHORS for acknowledgements.
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20
 */
21
22
#include <common.h>
23
#include <types.h>
24
25
#include "libfvalue_libcerror.h"
26
#include "libfvalue_split_utf8_string.h"
27
#include "libfvalue_types.h"
28
29
/* Splits an UTF-8 string
30
 * Returns 1 if successful or -1 on error
31
 */
32
int libfvalue_utf8_string_split(
33
     const uint8_t *utf8_string,
34
     size_t utf8_string_size,
35
     uint8_t delimiter,
36
     libfvalue_split_utf8_string_t **split_string,
37
     libcerror_error_t **error )
38
3.36k
{
39
3.36k
  uint8_t *segment_end      = NULL;
40
3.36k
  uint8_t *segment_start    = NULL;
41
3.36k
  const uint8_t *string_end = NULL;
42
3.36k
  static char *function     = "libfvalue_utf8_string_split";
43
3.36k
  size_t string_size        = 0;
44
3.36k
  ssize_t segment_length    = 0;
45
3.36k
  int number_of_segments    = 0;
46
3.36k
  int segment_index         = 0;
47
48
3.36k
  if( utf8_string == NULL )
49
0
  {
50
0
    libcerror_error_set(
51
0
     error,
52
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
53
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
54
0
     "%s: invalid UTF-8 string.",
55
0
     function );
56
57
0
    return( -1 );
58
0
  }
59
3.36k
  if( utf8_string_size > (size_t) SSIZE_MAX )
60
0
  {
61
0
    libcerror_error_set(
62
0
     error,
63
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
64
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
65
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
66
0
     function );
67
68
0
    return( -1 );
69
0
  }
70
3.36k
  if( split_string == NULL )
71
0
  {
72
0
    libcerror_error_set(
73
0
     error,
74
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
75
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
76
0
     "%s: invalid split string.",
77
0
     function );
78
79
0
    return( -1 );
80
0
  }
81
3.36k
  if( *split_string != NULL )
82
0
  {
83
0
    libcerror_error_set(
84
0
     error,
85
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
86
0
     LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET,
87
0
     "%s: invalid split string already set.",
88
0
     function );
89
90
0
    return( -1 );
91
0
  }
92
  /* An empty string has no segments
93
   */
94
3.36k
  if( ( utf8_string_size == 0 )
95
3.36k
   || ( utf8_string[ 0 ] == 0 ) )
96
8
  {
97
8
    return( 1 );
98
8
  }
99
3.35k
  if( utf8_string[ utf8_string_size - 1 ] == 0 )
100
3.35k
  {
101
3.35k
    utf8_string_size--;
102
3.35k
  }
103
  /* Determine the number of segments
104
   */
105
3.35k
  segment_start = (uint8_t *) utf8_string;
106
3.35k
  string_end    = utf8_string + utf8_string_size;
107
108
3.35k
  do
109
271k
  {
110
271k
    segment_end = segment_start;
111
112
5.61M
    while( segment_end <= string_end )
113
5.61M
    {
114
5.61M
      if( ( segment_end == string_end )
115
5.61M
       || ( *segment_end == 0 ) )
116
3.35k
      {
117
3.35k
        segment_end = NULL;
118
119
3.35k
        break;
120
3.35k
      }
121
5.60M
      else if( *segment_end == delimiter )
122
268k
      {
123
268k
        break;
124
268k
      }
125
5.34M
      segment_end++;
126
5.34M
    }
127
271k
    if( segment_end > string_end )
128
0
    {
129
0
      break;
130
0
    }
131
271k
    segment_index++;
132
133
271k
    if( segment_end == NULL )
134
3.35k
    {
135
3.35k
      break;
136
3.35k
    }
137
268k
    if( segment_end == segment_start )
138
7.60k
    {
139
7.60k
      segment_start++;
140
7.60k
    }
141
260k
    else if( segment_end != utf8_string )
142
260k
    {
143
260k
      segment_start = segment_end + 1;
144
260k
    }
145
268k
  }
146
268k
  while( segment_end != NULL );
147
148
0
  number_of_segments = segment_index;
149
150
3.35k
  if( libfvalue_split_utf8_string_initialize(
151
3.35k
       split_string,
152
3.35k
       utf8_string,
153
3.35k
       utf8_string_size + 1,
154
3.35k
       number_of_segments,
155
3.35k
       error ) != 1 )
156
0
  {
157
0
    libcerror_error_set(
158
0
     error,
159
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
160
0
     LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED,
161
0
     "%s: unable to initialize split string.",
162
0
     function );
163
164
0
    goto on_error;
165
0
  }
166
3.35k
  if( *split_string == NULL )
167
0
  {
168
0
    libcerror_error_set(
169
0
     error,
170
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
171
0
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
172
0
     "%s: missing split string.",
173
0
     function );
174
175
0
    goto on_error;
176
0
  }
177
  /* Do not bother splitting empty strings
178
   */
179
3.35k
  if( number_of_segments == 0 )
180
0
  {
181
0
    return( 1 );
182
0
  }
183
  /* Determine the segments
184
   * empty segments are stored as strings only containing the end of character
185
   */
186
3.35k
  if( libfvalue_split_utf8_string_get_string(
187
3.35k
       *split_string,
188
3.35k
       &segment_start,
189
3.35k
       &string_size,
190
3.35k
       error ) != 1 )
191
0
  {
192
0
    libcerror_error_set(
193
0
     error,
194
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
195
0
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
196
0
     "%s: unable to retrieve split UTF-8 string.",
197
0
     function );
198
199
0
    goto on_error;
200
0
  }
201
3.35k
  if( segment_start == NULL )
202
0
  {
203
0
    libcerror_error_set(
204
0
     error,
205
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
206
0
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
207
0
     "%s: missing segment start.",
208
0
     function );
209
210
0
    goto on_error;
211
0
  }
212
3.35k
  if( string_size < 1 )
213
0
  {
214
0
    libcerror_error_set(
215
0
     error,
216
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
217
0
     LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
218
0
     "%s: invalid string size value out of bounds.",
219
0
     function );
220
221
0
    goto on_error;
222
0
  }
223
3.35k
  string_end = segment_start + utf8_string_size;
224
225
3.35k
  for( segment_index = 0;
226
271k
       segment_index < number_of_segments;
227
268k
       segment_index++ )
228
271k
  {
229
271k
    segment_end = segment_start;
230
231
5.61M
    while( segment_end <= string_end )
232
5.61M
    {
233
5.61M
      if( ( segment_end == string_end )
234
5.61M
       || ( *segment_end == 0 ) )
235
3.35k
      {
236
3.35k
        segment_end = NULL;
237
238
3.35k
        break;
239
3.35k
      }
240
5.60M
      else if( *segment_end == delimiter )
241
268k
      {
242
268k
        break;
243
268k
      }
244
5.34M
      segment_end++;
245
5.34M
    }
246
271k
    if( segment_end == NULL )
247
3.35k
    {
248
3.35k
      segment_length = (ssize_t) ( string_end - segment_start );
249
3.35k
    }
250
268k
    else
251
268k
    {
252
268k
      segment_length = (ssize_t) ( segment_end - segment_start );
253
268k
    }
254
271k
    if( segment_length >= 0 )
255
271k
    {
256
271k
      segment_start[ segment_length ] = 0;
257
258
271k
      if( libfvalue_split_utf8_string_set_segment_by_index(
259
271k
           *split_string,
260
271k
           segment_index,
261
271k
           segment_start,
262
271k
           segment_length + 1,
263
271k
           error ) != 1 )
264
0
      {
265
0
        libcerror_error_set(
266
0
         error,
267
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
268
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
269
0
         "%s: unable to set split UTF-8 string segment: %d.",
270
0
         function,
271
0
         segment_index );
272
273
0
        goto on_error;
274
0
      }
275
271k
    }
276
271k
    if( segment_end == NULL )
277
3.35k
    {
278
3.35k
      break;
279
3.35k
    }
280
268k
    if( segment_end == string_end )
281
0
    {
282
0
      segment_start++;
283
0
    }
284
268k
    if( segment_end != string_end )
285
268k
    {
286
268k
      segment_start = segment_end + 1;
287
268k
    }
288
268k
  }
289
3.35k
  return( 1 );
290
291
0
on_error:
292
0
  if( *split_string != NULL )
293
0
  {
294
0
    libfvalue_split_utf8_string_free(
295
0
     split_string,
296
0
     NULL );
297
0
  }
298
0
  return( -1 );
299
3.35k
}
300