/src/libewf/libfvalue/libfvalue_utf8_string.c

Source (jump to first uncovered line)
/*
 * UTF-8 string value functions
 *
 * Copyright (C) 2010-2024, Joachim Metz <joachim.metz@gmail.com>
 *
 * Refer to AUTHORS for acknowledgements.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#include <common.h>
#include <types.h>

#include "libfvalue_libcerror.h"
#include "libfvalue_split_utf8_string.h"
#include "libfvalue_types.h"

/* Splits an UTF-8 string
 * Returns 1 if successful or -1 on error
 */
int libfvalue_utf8_string_split(
     const uint8_t *utf8_string,
     size_t utf8_string_size,
     uint8_t delimiter,
     libfvalue_split_utf8_string_t **split_string,
     libcerror_error_t **error )
{
  uint8_t *segment_end      = NULL;
  uint8_t *segment_start    = NULL;
  const uint8_t *string_end = NULL;
  static char *function     = "libfvalue_utf8_string_split";
  size_t string_size        = 0;
  ssize_t segment_length    = 0;
  int number_of_segments    = 0;
  int segment_index         = 0;

  if( utf8_string == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
     "%s: invalid UTF-8 string.",
     function );

    return( -1 );
  }
  if( utf8_string_size > (size_t) SSIZE_MAX )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
     "%s: invalid UTF-8 string size value exceeds maximum.",
     function );

    return( -1 );
  }
  if( split_string == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
     "%s: invalid split string.",
     function );

    return( -1 );
  }
  if( *split_string != NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET,
     "%s: invalid split string already set.",
     function );

    return( -1 );
  }
  /* An empty string has no segments
   */
  if( ( utf8_string_size == 0 )
   || ( utf8_string[ 0 ] == 0 ) )
  {
    return( 1 );
  }
  if( utf8_string[ utf8_string_size - 1 ] == 0 )
  {
    utf8_string_size--;
  }
  /* Determine the number of segments
   */
  segment_start = (uint8_t *) utf8_string;
  string_end    = utf8_string + utf8_string_size;

  do
  {
    segment_end = segment_start;

    while( segment_end <= string_end )
    {
      if( ( segment_end == string_end )
       || ( *segment_end == 0 ) )
      {
        segment_end = NULL;

        break;
      }
      else if( *segment_end == delimiter )
      {
        break;
      }
      segment_end++;
    }
    if( segment_end > string_end )
    {
      break;
    }
    segment_index++;

    if( segment_end == NULL )
    {
      break;
    }
    if( segment_end == segment_start )
    {
      segment_start++;
    }
    else if( segment_end != utf8_string )
    {
      segment_start = segment_end + 1;
    }
  }
  while( segment_end != NULL );

  number_of_segments = segment_index;

  if( libfvalue_split_utf8_string_initialize(
       split_string,
       utf8_string,
       utf8_string_size + 1,
       number_of_segments,
       error ) != 1 )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED,
     "%s: unable to initialize split string.",
     function );

    goto on_error;
  }
  if( *split_string == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
     "%s: missing split string.",
     function );

    goto on_error;
  }
  /* Do not bother splitting empty strings
   */
  if( number_of_segments == 0 )
  {
    return( 1 );
  }
  /* Determine the segments
   * empty segments are stored as strings only containing the end of character
   */
  if( libfvalue_split_utf8_string_get_string(
       *split_string,
       &segment_start,
       &string_size,
       error ) != 1 )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
     "%s: unable to retrieve split UTF-8 string.",
     function );

    goto on_error;
  }
  if( segment_start == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
     "%s: missing segment start.",
     function );

    goto on_error;
  }
  if( string_size < 1 )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
     "%s: invalid string size value out of bounds.",
     function );

    goto on_error;
  }
  string_end = segment_start + utf8_string_size;

  for( segment_index = 0;
       segment_index < number_of_segments;
       segment_index++ )
  {
    segment_end = segment_start;

    while( segment_end <= string_end )
    {
      if( ( segment_end == string_end )
       || ( *segment_end == 0 ) )
      {
        segment_end = NULL;

        break;
      }
      else if( *segment_end == delimiter )
      {
        break;
      }
      segment_end++;
    }
    if( segment_end == NULL )
    {
      segment_length = (ssize_t) ( string_end - segment_start );
    }
    else
    {
      segment_length = (ssize_t) ( segment_end - segment_start );
    }
    if( segment_length >= 0 )
    {
      segment_start[ segment_length ] = 0;

      if( libfvalue_split_utf8_string_set_segment_by_index(
           *split_string,
           segment_index,
           segment_start,
           segment_length + 1,
           error ) != 1 )
      {
        libcerror_error_set(
         error,
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
         "%s: unable to set split UTF-8 string segment: %d.",
         function,
         segment_index );

        goto on_error;
      }
    }
    if( segment_end == NULL )
    {
      break;
    }
    if( segment_end == string_end )
    {
      segment_start++;
    }
    if( segment_end != string_end )
    {
      segment_start = segment_end + 1;
    }
  }
  return( 1 );

on_error:
  if( *split_string != NULL )
  {
    libfvalue_split_utf8_string_free(
     split_string,
     NULL );
  }
  return( -1 );
}


Coverage Report

Created: 2024-02-25 07:20

Line	Count	Source (jump to first uncovered line)
1		/*
2		* UTF-8 string value functions
3		*
4		* Copyright (C) 2010-2024, Joachim Metz <joachim.metz@gmail.com>
5		*
6		* Refer to AUTHORS for acknowledgements.
7		*
8		* This program is free software: you can redistribute it and/or modify
9		* it under the terms of the GNU Lesser General Public License as published by
10		* the Free Software Foundation, either version 3 of the License, or
11		* (at your option) any later version.
12		*
13		* This program is distributed in the hope that it will be useful,
14		* but WITHOUT ANY WARRANTY; without even the implied warranty of
15		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		* GNU General Public License for more details.
17		*
18		* You should have received a copy of the GNU Lesser General Public License
19		* along with this program. If not, see <https://www.gnu.org/licenses/>.
20		*/
21
22		#include <common.h>
23		#include <types.h>
24
25		#include "libfvalue_libcerror.h"
26		#include "libfvalue_split_utf8_string.h"
27		#include "libfvalue_types.h"
28
29		/* Splits an UTF-8 string
30		* Returns 1 if successful or -1 on error
31		*/
32		int libfvalue_utf8_string_split(
33		const uint8_t *utf8_string,
34		size_t utf8_string_size,
35		uint8_t delimiter,
36		libfvalue_split_utf8_string_t **split_string,
37		libcerror_error_t **error )
38	3.36k	{
39	3.36k	uint8_t *segment_end = NULL;
40	3.36k	uint8_t *segment_start = NULL;
41	3.36k	const uint8_t *string_end = NULL;
42	3.36k	static char *function = "libfvalue_utf8_string_split";
43	3.36k	size_t string_size = 0;
44	3.36k	ssize_t segment_length = 0;
45	3.36k	int number_of_segments = 0;
46	3.36k	int segment_index = 0;
47
48	3.36k	if( utf8_string == NULL )
49	0	{
50	0	libcerror_error_set(
51	0	error,
52	0	LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
53	0	LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
54	0	"%s: invalid UTF-8 string.",
55	0	function );
56
57	0	return( -1 );
58	0	}
59	3.36k	if( utf8_string_size > (size_t) SSIZE_MAX )
60	0	{
61	0	libcerror_error_set(
62	0	error,
63	0	LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
64	0	LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
65	0	"%s: invalid UTF-8 string size value exceeds maximum.",
66	0	function );
67
68	0	return( -1 );
69	0	}
70	3.36k	if( split_string == NULL )
71	0	{
72	0	libcerror_error_set(
73	0	error,
74	0	LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
75	0	LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
76	0	"%s: invalid split string.",
77	0	function );
78
79	0	return( -1 );
80	0	}
81	3.36k	if( *split_string != NULL )
82	0	{
83	0	libcerror_error_set(
84	0	error,
85	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
86	0	LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET,
87	0	"%s: invalid split string already set.",
88	0	function );
89
90	0	return( -1 );
91	0	}
92		/* An empty string has no segments
93		*/
94	3.36k	if( ( utf8_string_size == 0 )
95	3.36k	\|\| ( utf8_string[ 0 ] == 0 ) )
96	8	{
97	8	return( 1 );
98	8	}
99	3.35k	if( utf8_string[ utf8_string_size - 1 ] == 0 )
100	3.35k	{
101	3.35k	utf8_string_size--;
102	3.35k	}
103		/* Determine the number of segments
104		*/
105	3.35k	segment_start = (uint8_t *) utf8_string;
106	3.35k	string_end = utf8_string + utf8_string_size;
107
108	3.35k	do
109	271k	{
110	271k	segment_end = segment_start;
111
112	5.61M	while( segment_end <= string_end )
113	5.61M	{
114	5.61M	if( ( segment_end == string_end )
115	5.61M	\|\| ( *segment_end == 0 ) )
116	3.35k	{
117	3.35k	segment_end = NULL;
118
119	3.35k	break;
120	3.35k	}
121	5.60M	else if( *segment_end == delimiter )
122	268k	{
123	268k	break;
124	268k	}
125	5.34M	segment_end++;
126	5.34M	}
127	271k	if( segment_end > string_end )
128	0	{
129	0	break;
130	0	}
131	271k	segment_index++;
132
133	271k	if( segment_end == NULL )
134	3.35k	{
135	3.35k	break;
136	3.35k	}
137	268k	if( segment_end == segment_start )
138	7.60k	{
139	7.60k	segment_start++;
140	7.60k	}
141	260k	else if( segment_end != utf8_string )
142	260k	{
143	260k	segment_start = segment_end + 1;
144	260k	}
145	268k	}
146	268k	while( segment_end != NULL );
147
148	0	number_of_segments = segment_index;
149
150	3.35k	if( libfvalue_split_utf8_string_initialize(
151	3.35k	split_string,
152	3.35k	utf8_string,
153	3.35k	utf8_string_size + 1,
154	3.35k	number_of_segments,
155	3.35k	error ) != 1 )
156	0	{
157	0	libcerror_error_set(
158	0	error,
159	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
160	0	LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED,
161	0	"%s: unable to initialize split string.",
162	0	function );
163
164	0	goto on_error;
165	0	}
166	3.35k	if( *split_string == NULL )
167	0	{
168	0	libcerror_error_set(
169	0	error,
170	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
171	0	LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
172	0	"%s: missing split string.",
173	0	function );
174
175	0	goto on_error;
176	0	}
177		/* Do not bother splitting empty strings
178		*/
179	3.35k	if( number_of_segments == 0 )
180	0	{
181	0	return( 1 );
182	0	}
183		/* Determine the segments
184		* empty segments are stored as strings only containing the end of character
185		*/
186	3.35k	if( libfvalue_split_utf8_string_get_string(
187	3.35k	*split_string,
188	3.35k	&segment_start,
189	3.35k	&string_size,
190	3.35k	error ) != 1 )
191	0	{
192	0	libcerror_error_set(
193	0	error,
194	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
195	0	LIBCERROR_RUNTIME_ERROR_GET_FAILED,
196	0	"%s: unable to retrieve split UTF-8 string.",
197	0	function );
198
199	0	goto on_error;
200	0	}
201	3.35k	if( segment_start == NULL )
202	0	{
203	0	libcerror_error_set(
204	0	error,
205	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
206	0	LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
207	0	"%s: missing segment start.",
208	0	function );
209
210	0	goto on_error;
211	0	}
212	3.35k	if( string_size < 1 )
213	0	{
214	0	libcerror_error_set(
215	0	error,
216	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
217	0	LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
218	0	"%s: invalid string size value out of bounds.",
219	0	function );
220
221	0	goto on_error;
222	0	}
223	3.35k	string_end = segment_start + utf8_string_size;
224
225	3.35k	for( segment_index = 0;
226	271k	segment_index < number_of_segments;
227	268k	segment_index++ )
228	271k	{
229	271k	segment_end = segment_start;
230
231	5.61M	while( segment_end <= string_end )
232	5.61M	{
233	5.61M	if( ( segment_end == string_end )
234	5.61M	\|\| ( *segment_end == 0 ) )
235	3.35k	{
236	3.35k	segment_end = NULL;
237
238	3.35k	break;
239	3.35k	}
240	5.60M	else if( *segment_end == delimiter )
241	268k	{
242	268k	break;
243	268k	}
244	5.34M	segment_end++;
245	5.34M	}
246	271k	if( segment_end == NULL )
247	3.35k	{
248	3.35k	segment_length = (ssize_t) ( string_end - segment_start );
249	3.35k	}
250	268k	else
251	268k	{
252	268k	segment_length = (ssize_t) ( segment_end - segment_start );
253	268k	}
254	271k	if( segment_length >= 0 )
255	271k	{
256	271k	segment_start[ segment_length ] = 0;
257
258	271k	if( libfvalue_split_utf8_string_set_segment_by_index(
259	271k	*split_string,
260	271k	segment_index,
261	271k	segment_start,
262	271k	segment_length + 1,
263	271k	error ) != 1 )
264	0	{
265	0	libcerror_error_set(
266	0	error,
267	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
268	0	LIBCERROR_RUNTIME_ERROR_SET_FAILED,
269	0	"%s: unable to set split UTF-8 string segment: %d.",
270	0	function,
271	0	segment_index );
272
273	0	goto on_error;
274	0	}
275	271k	}
276	271k	if( segment_end == NULL )
277	3.35k	{
278	3.35k	break;
279	3.35k	}
280	268k	if( segment_end == string_end )
281	0	{
282	0	segment_start++;
283	0	}
284	268k	if( segment_end != string_end )
285	268k	{
286	268k	segment_start = segment_end + 1;
287	268k	}
288	268k	}
289	3.35k	return( 1 );
290
291	0	on_error:
292	0	if( *split_string != NULL )
293	0	{
294	0	libfvalue_split_utf8_string_free(
295	0	split_string,
296	0	NULL );
297	0	}
298	0	return( -1 );
299	3.35k	}
300