/src/libewf/libfvalue/libfvalue_utf8_string.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * UTF-8 string value functions |
3 | | * |
4 | | * Copyright (C) 2010-2024, Joachim Metz <joachim.metz@gmail.com> |
5 | | * |
6 | | * Refer to AUTHORS for acknowledgements. |
7 | | * |
8 | | * This program is free software: you can redistribute it and/or modify |
9 | | * it under the terms of the GNU Lesser General Public License as published by |
10 | | * the Free Software Foundation, either version 3 of the License, or |
11 | | * (at your option) any later version. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public License |
19 | | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <common.h> |
23 | | #include <types.h> |
24 | | |
25 | | #include "libfvalue_libcerror.h" |
26 | | #include "libfvalue_split_utf8_string.h" |
27 | | #include "libfvalue_types.h" |
28 | | |
29 | | /* Splits an UTF-8 string |
30 | | * Returns 1 if successful or -1 on error |
31 | | */ |
32 | | int libfvalue_utf8_string_split( |
33 | | const uint8_t *utf8_string, |
34 | | size_t utf8_string_size, |
35 | | uint8_t delimiter, |
36 | | libfvalue_split_utf8_string_t **split_string, |
37 | | libcerror_error_t **error ) |
38 | 3.36k | { |
39 | 3.36k | uint8_t *segment_end = NULL; |
40 | 3.36k | uint8_t *segment_start = NULL; |
41 | 3.36k | const uint8_t *string_end = NULL; |
42 | 3.36k | static char *function = "libfvalue_utf8_string_split"; |
43 | 3.36k | size_t string_size = 0; |
44 | 3.36k | ssize_t segment_length = 0; |
45 | 3.36k | int number_of_segments = 0; |
46 | 3.36k | int segment_index = 0; |
47 | | |
48 | 3.36k | if( utf8_string == NULL ) |
49 | 0 | { |
50 | 0 | libcerror_error_set( |
51 | 0 | error, |
52 | 0 | LIBCERROR_ERROR_DOMAIN_ARGUMENTS, |
53 | 0 | LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, |
54 | 0 | "%s: invalid UTF-8 string.", |
55 | 0 | function ); |
56 | |
|
57 | 0 | return( -1 ); |
58 | 0 | } |
59 | 3.36k | if( utf8_string_size > (size_t) SSIZE_MAX ) |
60 | 0 | { |
61 | 0 | libcerror_error_set( |
62 | 0 | error, |
63 | 0 | LIBCERROR_ERROR_DOMAIN_ARGUMENTS, |
64 | 0 | LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, |
65 | 0 | "%s: invalid UTF-8 string size value exceeds maximum.", |
66 | 0 | function ); |
67 | |
|
68 | 0 | return( -1 ); |
69 | 0 | } |
70 | 3.36k | if( split_string == NULL ) |
71 | 0 | { |
72 | 0 | libcerror_error_set( |
73 | 0 | error, |
74 | 0 | LIBCERROR_ERROR_DOMAIN_ARGUMENTS, |
75 | 0 | LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, |
76 | 0 | "%s: invalid split string.", |
77 | 0 | function ); |
78 | |
|
79 | 0 | return( -1 ); |
80 | 0 | } |
81 | 3.36k | if( *split_string != NULL ) |
82 | 0 | { |
83 | 0 | libcerror_error_set( |
84 | 0 | error, |
85 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
86 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET, |
87 | 0 | "%s: invalid split string already set.", |
88 | 0 | function ); |
89 | |
|
90 | 0 | return( -1 ); |
91 | 0 | } |
92 | | /* An empty string has no segments |
93 | | */ |
94 | 3.36k | if( ( utf8_string_size == 0 ) |
95 | 3.36k | || ( utf8_string[ 0 ] == 0 ) ) |
96 | 8 | { |
97 | 8 | return( 1 ); |
98 | 8 | } |
99 | 3.35k | if( utf8_string[ utf8_string_size - 1 ] == 0 ) |
100 | 3.35k | { |
101 | 3.35k | utf8_string_size--; |
102 | 3.35k | } |
103 | | /* Determine the number of segments |
104 | | */ |
105 | 3.35k | segment_start = (uint8_t *) utf8_string; |
106 | 3.35k | string_end = utf8_string + utf8_string_size; |
107 | | |
108 | 3.35k | do |
109 | 271k | { |
110 | 271k | segment_end = segment_start; |
111 | | |
112 | 5.61M | while( segment_end <= string_end ) |
113 | 5.61M | { |
114 | 5.61M | if( ( segment_end == string_end ) |
115 | 5.61M | || ( *segment_end == 0 ) ) |
116 | 3.35k | { |
117 | 3.35k | segment_end = NULL; |
118 | | |
119 | 3.35k | break; |
120 | 3.35k | } |
121 | 5.60M | else if( *segment_end == delimiter ) |
122 | 268k | { |
123 | 268k | break; |
124 | 268k | } |
125 | 5.34M | segment_end++; |
126 | 5.34M | } |
127 | 271k | if( segment_end > string_end ) |
128 | 0 | { |
129 | 0 | break; |
130 | 0 | } |
131 | 271k | segment_index++; |
132 | | |
133 | 271k | if( segment_end == NULL ) |
134 | 3.35k | { |
135 | 3.35k | break; |
136 | 3.35k | } |
137 | 268k | if( segment_end == segment_start ) |
138 | 7.60k | { |
139 | 7.60k | segment_start++; |
140 | 7.60k | } |
141 | 260k | else if( segment_end != utf8_string ) |
142 | 260k | { |
143 | 260k | segment_start = segment_end + 1; |
144 | 260k | } |
145 | 268k | } |
146 | 268k | while( segment_end != NULL ); |
147 | | |
148 | 0 | number_of_segments = segment_index; |
149 | | |
150 | 3.35k | if( libfvalue_split_utf8_string_initialize( |
151 | 3.35k | split_string, |
152 | 3.35k | utf8_string, |
153 | 3.35k | utf8_string_size + 1, |
154 | 3.35k | number_of_segments, |
155 | 3.35k | error ) != 1 ) |
156 | 0 | { |
157 | 0 | libcerror_error_set( |
158 | 0 | error, |
159 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
160 | 0 | LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED, |
161 | 0 | "%s: unable to initialize split string.", |
162 | 0 | function ); |
163 | |
|
164 | 0 | goto on_error; |
165 | 0 | } |
166 | 3.35k | if( *split_string == NULL ) |
167 | 0 | { |
168 | 0 | libcerror_error_set( |
169 | 0 | error, |
170 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
171 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_MISSING, |
172 | 0 | "%s: missing split string.", |
173 | 0 | function ); |
174 | |
|
175 | 0 | goto on_error; |
176 | 0 | } |
177 | | /* Do not bother splitting empty strings |
178 | | */ |
179 | 3.35k | if( number_of_segments == 0 ) |
180 | 0 | { |
181 | 0 | return( 1 ); |
182 | 0 | } |
183 | | /* Determine the segments |
184 | | * empty segments are stored as strings only containing the end of character |
185 | | */ |
186 | 3.35k | if( libfvalue_split_utf8_string_get_string( |
187 | 3.35k | *split_string, |
188 | 3.35k | &segment_start, |
189 | 3.35k | &string_size, |
190 | 3.35k | error ) != 1 ) |
191 | 0 | { |
192 | 0 | libcerror_error_set( |
193 | 0 | error, |
194 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
195 | 0 | LIBCERROR_RUNTIME_ERROR_GET_FAILED, |
196 | 0 | "%s: unable to retrieve split UTF-8 string.", |
197 | 0 | function ); |
198 | |
|
199 | 0 | goto on_error; |
200 | 0 | } |
201 | 3.35k | if( segment_start == NULL ) |
202 | 0 | { |
203 | 0 | libcerror_error_set( |
204 | 0 | error, |
205 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
206 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_MISSING, |
207 | 0 | "%s: missing segment start.", |
208 | 0 | function ); |
209 | |
|
210 | 0 | goto on_error; |
211 | 0 | } |
212 | 3.35k | if( string_size < 1 ) |
213 | 0 | { |
214 | 0 | libcerror_error_set( |
215 | 0 | error, |
216 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
217 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS, |
218 | 0 | "%s: invalid string size value out of bounds.", |
219 | 0 | function ); |
220 | |
|
221 | 0 | goto on_error; |
222 | 0 | } |
223 | 3.35k | string_end = segment_start + utf8_string_size; |
224 | | |
225 | 3.35k | for( segment_index = 0; |
226 | 271k | segment_index < number_of_segments; |
227 | 268k | segment_index++ ) |
228 | 271k | { |
229 | 271k | segment_end = segment_start; |
230 | | |
231 | 5.61M | while( segment_end <= string_end ) |
232 | 5.61M | { |
233 | 5.61M | if( ( segment_end == string_end ) |
234 | 5.61M | || ( *segment_end == 0 ) ) |
235 | 3.35k | { |
236 | 3.35k | segment_end = NULL; |
237 | | |
238 | 3.35k | break; |
239 | 3.35k | } |
240 | 5.60M | else if( *segment_end == delimiter ) |
241 | 268k | { |
242 | 268k | break; |
243 | 268k | } |
244 | 5.34M | segment_end++; |
245 | 5.34M | } |
246 | 271k | if( segment_end == NULL ) |
247 | 3.35k | { |
248 | 3.35k | segment_length = (ssize_t) ( string_end - segment_start ); |
249 | 3.35k | } |
250 | 268k | else |
251 | 268k | { |
252 | 268k | segment_length = (ssize_t) ( segment_end - segment_start ); |
253 | 268k | } |
254 | 271k | if( segment_length >= 0 ) |
255 | 271k | { |
256 | 271k | segment_start[ segment_length ] = 0; |
257 | | |
258 | 271k | if( libfvalue_split_utf8_string_set_segment_by_index( |
259 | 271k | *split_string, |
260 | 271k | segment_index, |
261 | 271k | segment_start, |
262 | 271k | segment_length + 1, |
263 | 271k | error ) != 1 ) |
264 | 0 | { |
265 | 0 | libcerror_error_set( |
266 | 0 | error, |
267 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
268 | 0 | LIBCERROR_RUNTIME_ERROR_SET_FAILED, |
269 | 0 | "%s: unable to set split UTF-8 string segment: %d.", |
270 | 0 | function, |
271 | 0 | segment_index ); |
272 | |
|
273 | 0 | goto on_error; |
274 | 0 | } |
275 | 271k | } |
276 | 271k | if( segment_end == NULL ) |
277 | 3.35k | { |
278 | 3.35k | break; |
279 | 3.35k | } |
280 | 268k | if( segment_end == string_end ) |
281 | 0 | { |
282 | 0 | segment_start++; |
283 | 0 | } |
284 | 268k | if( segment_end != string_end ) |
285 | 268k | { |
286 | 268k | segment_start = segment_end + 1; |
287 | 268k | } |
288 | 268k | } |
289 | 3.35k | return( 1 ); |
290 | | |
291 | 0 | on_error: |
292 | 0 | if( *split_string != NULL ) |
293 | 0 | { |
294 | 0 | libfvalue_split_utf8_string_free( |
295 | 0 | split_string, |
296 | 0 | NULL ); |
297 | 0 | } |
298 | 0 | return( -1 ); |
299 | 3.35k | } |
300 | | |