Coverage Report

Created: 2023-06-07 06:53

/src/libewf/libfvalue/libfvalue_utf8_string.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * UTF-8 string value functions
3
 *
4
 * Copyright (C) 2010-2022, Joachim Metz <joachim.metz@gmail.com>
5
 *
6
 * Refer to AUTHORS for acknowledgements.
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20
 */
21
22
#include <common.h>
23
#include <types.h>
24
25
#include "libfvalue_libcerror.h"
26
#include "libfvalue_split_utf8_string.h"
27
#include "libfvalue_types.h"
28
29
/* Splits an UTF-8 string
30
 * Returns 1 if successful or -1 on error
31
 */
32
int libfvalue_utf8_string_split(
33
     const uint8_t *utf8_string,
34
     size_t utf8_string_size,
35
     uint8_t delimiter,
36
     libfvalue_split_utf8_string_t **split_string,
37
     libcerror_error_t **error )
38
2.97k
{
39
2.97k
  uint8_t *segment_start = NULL;
40
2.97k
  uint8_t *segment_end   = NULL;
41
2.97k
  uint8_t *string_end    = NULL;
42
2.97k
  static char *function  = "libfvalue_utf8_string_split";
43
2.97k
  size_t string_size     = 0;
44
2.97k
  ssize_t segment_length = 0;
45
2.97k
  int number_of_segments = 0;
46
2.97k
  int segment_index      = 0;
47
48
2.97k
  if( utf8_string == NULL )
49
0
  {
50
0
    libcerror_error_set(
51
0
     error,
52
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
53
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
54
0
     "%s: invalid UTF-8 string.",
55
0
     function );
56
57
0
    return( -1 );
58
0
  }
59
2.97k
  if( utf8_string_size > (size_t) SSIZE_MAX )
60
0
  {
61
0
    libcerror_error_set(
62
0
     error,
63
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
64
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
65
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
66
0
     function );
67
68
0
    return( -1 );
69
0
  }
70
2.97k
  if( split_string == NULL )
71
0
  {
72
0
    libcerror_error_set(
73
0
     error,
74
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
75
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
76
0
     "%s: invalid split string.",
77
0
     function );
78
79
0
    return( -1 );
80
0
  }
81
2.97k
  if( *split_string != NULL )
82
0
  {
83
0
    libcerror_error_set(
84
0
     error,
85
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
86
0
     LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET,
87
0
     "%s: invalid split string already set.",
88
0
     function );
89
90
0
    return( -1 );
91
0
  }
92
  /* An empty string has no segments
93
   */
94
2.97k
  if( ( utf8_string_size == 0 )
95
2.97k
   || ( utf8_string[ 0 ] == 0 ) )
96
8
  {
97
8
    return( 1 );
98
8
  }
99
  /* Determine the number of segments
100
   */
101
2.96k
  segment_start = (uint8_t *) utf8_string;
102
2.96k
  string_end    = (uint8_t *) &( utf8_string[ utf8_string_size - 1 ] );
103
104
2.96k
  do
105
268k
  {
106
268k
    segment_end = segment_start;
107
108
5.59M
    while( segment_end <= string_end )
109
5.59M
    {
110
5.59M
      if( ( segment_end == string_end )
111
5.59M
       || ( *segment_end == 0 ) )
112
2.96k
      {
113
2.96k
        segment_end = NULL;
114
115
2.96k
        break;
116
2.96k
      }
117
5.59M
      else if( *segment_end == delimiter )
118
265k
      {
119
265k
        break;
120
265k
      }
121
5.32M
      segment_end++;
122
5.32M
    }
123
268k
    if( segment_end > string_end )
124
0
    {
125
0
      break;
126
0
    }
127
268k
    segment_index++;
128
129
268k
    if( segment_end == NULL )
130
2.96k
    {
131
2.96k
      break;
132
2.96k
    }
133
265k
    if( segment_end == segment_start )
134
7.05k
    {
135
7.05k
      segment_start++;
136
7.05k
    }
137
258k
    else if( segment_end != utf8_string )
138
258k
    {
139
258k
      segment_start = segment_end + 1;
140
258k
    }
141
265k
  }
142
265k
  while( segment_end != NULL );
143
144
0
  number_of_segments = segment_index;
145
146
2.96k
  if( libfvalue_split_utf8_string_initialize(
147
2.96k
       split_string,
148
2.96k
       utf8_string,
149
2.96k
       utf8_string_size,
150
2.96k
       number_of_segments,
151
2.96k
       error ) != 1 )
152
0
  {
153
0
    libcerror_error_set(
154
0
     error,
155
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
156
0
     LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED,
157
0
     "%s: unable to initialize split string.",
158
0
     function );
159
160
0
    goto on_error;
161
0
  }
162
2.96k
  if( *split_string == NULL )
163
0
  {
164
0
    libcerror_error_set(
165
0
     error,
166
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
167
0
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
168
0
     "%s: missing split string.",
169
0
     function );
170
171
0
    goto on_error;
172
0
  }
173
  /* Do not bother splitting empty strings
174
   */
175
2.96k
  if( number_of_segments == 0 )
176
0
  {
177
0
    return( 1 );
178
0
  }
179
  /* Determine the segments
180
   * empty segments are stored as strings only containing the end of character
181
   */
182
2.96k
  if( libfvalue_split_utf8_string_get_string(
183
2.96k
       *split_string,
184
2.96k
       &segment_start,
185
2.96k
       &string_size,
186
2.96k
       error ) != 1 )
187
0
  {
188
0
    libcerror_error_set(
189
0
     error,
190
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
191
0
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
192
0
     "%s: unable to retrieve split UTF-8 string.",
193
0
     function );
194
195
0
    goto on_error;
196
0
  }
197
2.96k
  if( segment_start == NULL )
198
0
  {
199
0
    libcerror_error_set(
200
0
     error,
201
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
202
0
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
203
0
     "%s: missing segment start.",
204
0
     function );
205
206
0
    goto on_error;
207
0
  }
208
2.96k
  if( string_size < 1 )
209
0
  {
210
0
    libcerror_error_set(
211
0
     error,
212
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
213
0
     LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
214
0
     "%s: invalid string size value out of bounds.",
215
0
     function );
216
217
0
    goto on_error;
218
0
  }
219
2.96k
  string_end = &( segment_start[ string_size - 1 ] );
220
221
2.96k
  for( segment_index = 0;
222
268k
       segment_index < number_of_segments;
223
265k
       segment_index++ )
224
268k
  {
225
268k
    segment_end = segment_start;
226
227
5.59M
    while( segment_end <= string_end )
228
5.59M
    {
229
5.59M
      if( ( segment_end == string_end )
230
5.59M
       || ( *segment_end == 0 ) )
231
2.96k
      {
232
2.96k
        segment_end = NULL;
233
234
2.96k
        break;
235
2.96k
      }
236
5.59M
      else if( *segment_end == delimiter )
237
265k
      {
238
265k
        break;
239
265k
      }
240
5.32M
      segment_end++;
241
5.32M
    }
242
268k
    if( segment_end == NULL )
243
2.96k
    {
244
2.96k
      segment_length = (ssize_t) ( string_end - segment_start );
245
2.96k
    }
246
265k
    else
247
265k
    {
248
265k
      segment_length = (ssize_t) ( segment_end - segment_start );
249
265k
    }
250
268k
    if( segment_length >= 0 )
251
268k
    {
252
268k
      segment_start[ segment_length ] = 0;
253
254
268k
      if( libfvalue_split_utf8_string_set_segment_by_index(
255
268k
           *split_string,
256
268k
           segment_index,
257
268k
           segment_start,
258
268k
           segment_length + 1,
259
268k
           error ) != 1 )
260
0
      {
261
0
        libcerror_error_set(
262
0
         error,
263
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
264
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
265
0
         "%s: unable to set split UTF-8 string segment: %d.",
266
0
         function,
267
0
         segment_index );
268
269
0
        goto on_error;
270
0
      }
271
268k
    }
272
268k
    if( segment_end == NULL )
273
2.96k
    {
274
2.96k
      break;
275
2.96k
    }
276
265k
    if( segment_end == string_end )
277
0
    {
278
0
      segment_start++;
279
0
    }
280
265k
    if( segment_end != string_end )
281
265k
    {
282
265k
      segment_start = segment_end + 1;
283
265k
    }
284
265k
  }
285
2.96k
  return( 1 );
286
287
0
on_error:
288
0
  if( *split_string != NULL )
289
0
  {
290
0
    libfvalue_split_utf8_string_free(
291
0
     split_string,
292
0
     NULL );
293
0
  }
294
0
  return( -1 );
295
2.96k
}
296