/src/libewf/libfvalue/libfvalue_utf8_string.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * UTF-8 string value functions |
3 | | * |
4 | | * Copyright (C) 2010-2022, Joachim Metz <joachim.metz@gmail.com> |
5 | | * |
6 | | * Refer to AUTHORS for acknowledgements. |
7 | | * |
8 | | * This program is free software: you can redistribute it and/or modify |
9 | | * it under the terms of the GNU Lesser General Public License as published by |
10 | | * the Free Software Foundation, either version 3 of the License, or |
11 | | * (at your option) any later version. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public License |
19 | | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <common.h> |
23 | | #include <types.h> |
24 | | |
25 | | #include "libfvalue_libcerror.h" |
26 | | #include "libfvalue_split_utf8_string.h" |
27 | | #include "libfvalue_types.h" |
28 | | |
29 | | /* Splits an UTF-8 string |
30 | | * Returns 1 if successful or -1 on error |
31 | | */ |
32 | | int libfvalue_utf8_string_split( |
33 | | const uint8_t *utf8_string, |
34 | | size_t utf8_string_size, |
35 | | uint8_t delimiter, |
36 | | libfvalue_split_utf8_string_t **split_string, |
37 | | libcerror_error_t **error ) |
38 | 2.97k | { |
39 | 2.97k | uint8_t *segment_start = NULL; |
40 | 2.97k | uint8_t *segment_end = NULL; |
41 | 2.97k | uint8_t *string_end = NULL; |
42 | 2.97k | static char *function = "libfvalue_utf8_string_split"; |
43 | 2.97k | size_t string_size = 0; |
44 | 2.97k | ssize_t segment_length = 0; |
45 | 2.97k | int number_of_segments = 0; |
46 | 2.97k | int segment_index = 0; |
47 | | |
48 | 2.97k | if( utf8_string == NULL ) |
49 | 0 | { |
50 | 0 | libcerror_error_set( |
51 | 0 | error, |
52 | 0 | LIBCERROR_ERROR_DOMAIN_ARGUMENTS, |
53 | 0 | LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, |
54 | 0 | "%s: invalid UTF-8 string.", |
55 | 0 | function ); |
56 | |
|
57 | 0 | return( -1 ); |
58 | 0 | } |
59 | 2.97k | if( utf8_string_size > (size_t) SSIZE_MAX ) |
60 | 0 | { |
61 | 0 | libcerror_error_set( |
62 | 0 | error, |
63 | 0 | LIBCERROR_ERROR_DOMAIN_ARGUMENTS, |
64 | 0 | LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, |
65 | 0 | "%s: invalid UTF-8 string size value exceeds maximum.", |
66 | 0 | function ); |
67 | |
|
68 | 0 | return( -1 ); |
69 | 0 | } |
70 | 2.97k | if( split_string == NULL ) |
71 | 0 | { |
72 | 0 | libcerror_error_set( |
73 | 0 | error, |
74 | 0 | LIBCERROR_ERROR_DOMAIN_ARGUMENTS, |
75 | 0 | LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, |
76 | 0 | "%s: invalid split string.", |
77 | 0 | function ); |
78 | |
|
79 | 0 | return( -1 ); |
80 | 0 | } |
81 | 2.97k | if( *split_string != NULL ) |
82 | 0 | { |
83 | 0 | libcerror_error_set( |
84 | 0 | error, |
85 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
86 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET, |
87 | 0 | "%s: invalid split string already set.", |
88 | 0 | function ); |
89 | |
|
90 | 0 | return( -1 ); |
91 | 0 | } |
92 | | /* An empty string has no segments |
93 | | */ |
94 | 2.97k | if( ( utf8_string_size == 0 ) |
95 | 2.97k | || ( utf8_string[ 0 ] == 0 ) ) |
96 | 8 | { |
97 | 8 | return( 1 ); |
98 | 8 | } |
99 | | /* Determine the number of segments |
100 | | */ |
101 | 2.96k | segment_start = (uint8_t *) utf8_string; |
102 | 2.96k | string_end = (uint8_t *) &( utf8_string[ utf8_string_size - 1 ] ); |
103 | | |
104 | 2.96k | do |
105 | 268k | { |
106 | 268k | segment_end = segment_start; |
107 | | |
108 | 5.59M | while( segment_end <= string_end ) |
109 | 5.59M | { |
110 | 5.59M | if( ( segment_end == string_end ) |
111 | 5.59M | || ( *segment_end == 0 ) ) |
112 | 2.96k | { |
113 | 2.96k | segment_end = NULL; |
114 | | |
115 | 2.96k | break; |
116 | 2.96k | } |
117 | 5.59M | else if( *segment_end == delimiter ) |
118 | 265k | { |
119 | 265k | break; |
120 | 265k | } |
121 | 5.32M | segment_end++; |
122 | 5.32M | } |
123 | 268k | if( segment_end > string_end ) |
124 | 0 | { |
125 | 0 | break; |
126 | 0 | } |
127 | 268k | segment_index++; |
128 | | |
129 | 268k | if( segment_end == NULL ) |
130 | 2.96k | { |
131 | 2.96k | break; |
132 | 2.96k | } |
133 | 265k | if( segment_end == segment_start ) |
134 | 7.05k | { |
135 | 7.05k | segment_start++; |
136 | 7.05k | } |
137 | 258k | else if( segment_end != utf8_string ) |
138 | 258k | { |
139 | 258k | segment_start = segment_end + 1; |
140 | 258k | } |
141 | 265k | } |
142 | 265k | while( segment_end != NULL ); |
143 | | |
144 | 0 | number_of_segments = segment_index; |
145 | | |
146 | 2.96k | if( libfvalue_split_utf8_string_initialize( |
147 | 2.96k | split_string, |
148 | 2.96k | utf8_string, |
149 | 2.96k | utf8_string_size, |
150 | 2.96k | number_of_segments, |
151 | 2.96k | error ) != 1 ) |
152 | 0 | { |
153 | 0 | libcerror_error_set( |
154 | 0 | error, |
155 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
156 | 0 | LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED, |
157 | 0 | "%s: unable to initialize split string.", |
158 | 0 | function ); |
159 | |
|
160 | 0 | goto on_error; |
161 | 0 | } |
162 | 2.96k | if( *split_string == NULL ) |
163 | 0 | { |
164 | 0 | libcerror_error_set( |
165 | 0 | error, |
166 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
167 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_MISSING, |
168 | 0 | "%s: missing split string.", |
169 | 0 | function ); |
170 | |
|
171 | 0 | goto on_error; |
172 | 0 | } |
173 | | /* Do not bother splitting empty strings |
174 | | */ |
175 | 2.96k | if( number_of_segments == 0 ) |
176 | 0 | { |
177 | 0 | return( 1 ); |
178 | 0 | } |
179 | | /* Determine the segments |
180 | | * empty segments are stored as strings only containing the end of character |
181 | | */ |
182 | 2.96k | if( libfvalue_split_utf8_string_get_string( |
183 | 2.96k | *split_string, |
184 | 2.96k | &segment_start, |
185 | 2.96k | &string_size, |
186 | 2.96k | error ) != 1 ) |
187 | 0 | { |
188 | 0 | libcerror_error_set( |
189 | 0 | error, |
190 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
191 | 0 | LIBCERROR_RUNTIME_ERROR_GET_FAILED, |
192 | 0 | "%s: unable to retrieve split UTF-8 string.", |
193 | 0 | function ); |
194 | |
|
195 | 0 | goto on_error; |
196 | 0 | } |
197 | 2.96k | if( segment_start == NULL ) |
198 | 0 | { |
199 | 0 | libcerror_error_set( |
200 | 0 | error, |
201 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
202 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_MISSING, |
203 | 0 | "%s: missing segment start.", |
204 | 0 | function ); |
205 | |
|
206 | 0 | goto on_error; |
207 | 0 | } |
208 | 2.96k | if( string_size < 1 ) |
209 | 0 | { |
210 | 0 | libcerror_error_set( |
211 | 0 | error, |
212 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
213 | 0 | LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS, |
214 | 0 | "%s: invalid string size value out of bounds.", |
215 | 0 | function ); |
216 | |
|
217 | 0 | goto on_error; |
218 | 0 | } |
219 | 2.96k | string_end = &( segment_start[ string_size - 1 ] ); |
220 | | |
221 | 2.96k | for( segment_index = 0; |
222 | 268k | segment_index < number_of_segments; |
223 | 265k | segment_index++ ) |
224 | 268k | { |
225 | 268k | segment_end = segment_start; |
226 | | |
227 | 5.59M | while( segment_end <= string_end ) |
228 | 5.59M | { |
229 | 5.59M | if( ( segment_end == string_end ) |
230 | 5.59M | || ( *segment_end == 0 ) ) |
231 | 2.96k | { |
232 | 2.96k | segment_end = NULL; |
233 | | |
234 | 2.96k | break; |
235 | 2.96k | } |
236 | 5.59M | else if( *segment_end == delimiter ) |
237 | 265k | { |
238 | 265k | break; |
239 | 265k | } |
240 | 5.32M | segment_end++; |
241 | 5.32M | } |
242 | 268k | if( segment_end == NULL ) |
243 | 2.96k | { |
244 | 2.96k | segment_length = (ssize_t) ( string_end - segment_start ); |
245 | 2.96k | } |
246 | 265k | else |
247 | 265k | { |
248 | 265k | segment_length = (ssize_t) ( segment_end - segment_start ); |
249 | 265k | } |
250 | 268k | if( segment_length >= 0 ) |
251 | 268k | { |
252 | 268k | segment_start[ segment_length ] = 0; |
253 | | |
254 | 268k | if( libfvalue_split_utf8_string_set_segment_by_index( |
255 | 268k | *split_string, |
256 | 268k | segment_index, |
257 | 268k | segment_start, |
258 | 268k | segment_length + 1, |
259 | 268k | error ) != 1 ) |
260 | 0 | { |
261 | 0 | libcerror_error_set( |
262 | 0 | error, |
263 | 0 | LIBCERROR_ERROR_DOMAIN_RUNTIME, |
264 | 0 | LIBCERROR_RUNTIME_ERROR_SET_FAILED, |
265 | 0 | "%s: unable to set split UTF-8 string segment: %d.", |
266 | 0 | function, |
267 | 0 | segment_index ); |
268 | |
|
269 | 0 | goto on_error; |
270 | 0 | } |
271 | 268k | } |
272 | 268k | if( segment_end == NULL ) |
273 | 2.96k | { |
274 | 2.96k | break; |
275 | 2.96k | } |
276 | 265k | if( segment_end == string_end ) |
277 | 0 | { |
278 | 0 | segment_start++; |
279 | 0 | } |
280 | 265k | if( segment_end != string_end ) |
281 | 265k | { |
282 | 265k | segment_start = segment_end + 1; |
283 | 265k | } |
284 | 265k | } |
285 | 2.96k | return( 1 ); |
286 | | |
287 | 0 | on_error: |
288 | 0 | if( *split_string != NULL ) |
289 | 0 | { |
290 | 0 | libfvalue_split_utf8_string_free( |
291 | 0 | split_string, |
292 | 0 | NULL ); |
293 | 0 | } |
294 | 0 | return( -1 ); |
295 | 2.96k | } |
296 | | |