/src/libewf/libfvalue/libfvalue_utf8_string.c

Source (jump to first uncovered line)
/*
 * UTF-8 string value functions
 *
 * Copyright (C) 2010-2022, Joachim Metz <joachim.metz@gmail.com>
 *
 * Refer to AUTHORS for acknowledgements.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#include <common.h>
#include <types.h>

#include "libfvalue_libcerror.h"
#include "libfvalue_split_utf8_string.h"
#include "libfvalue_types.h"

/* Splits an UTF-8 string
 * Returns 1 if successful or -1 on error
 */
int libfvalue_utf8_string_split(
     const uint8_t *utf8_string,
     size_t utf8_string_size,
     uint8_t delimiter,
     libfvalue_split_utf8_string_t **split_string,
     libcerror_error_t **error )
{
  uint8_t *segment_start = NULL;
  uint8_t *segment_end   = NULL;
  uint8_t *string_end    = NULL;
  static char *function  = "libfvalue_utf8_string_split";
  size_t string_size     = 0;
  ssize_t segment_length = 0;
  int number_of_segments = 0;
  int segment_index      = 0;

  if( utf8_string == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
     "%s: invalid UTF-8 string.",
     function );

    return( -1 );
  }
  if( utf8_string_size > (size_t) SSIZE_MAX )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
     "%s: invalid UTF-8 string size value exceeds maximum.",
     function );

    return( -1 );
  }
  if( split_string == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
     "%s: invalid split string.",
     function );

    return( -1 );
  }
  if( *split_string != NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET,
     "%s: invalid split string already set.",
     function );

    return( -1 );
  }
  /* An empty string has no segments
   */
  if( ( utf8_string_size == 0 )
   || ( utf8_string[ 0 ] == 0 ) )
  {
    return( 1 );
  }
  /* Determine the number of segments
   */
  segment_start = (uint8_t *) utf8_string;
  string_end    = (uint8_t *) &( utf8_string[ utf8_string_size - 1 ] );

  do
  {
    segment_end = segment_start;

    while( segment_end <= string_end )
    {
      if( ( segment_end == string_end )
       || ( *segment_end == 0 ) )
      {
        segment_end = NULL;

        break;
      }
      else if( *segment_end == delimiter )
      {
        break;
      }
      segment_end++;
    }
    if( segment_end > string_end )
    {
      break;
    }
    segment_index++;

    if( segment_end == NULL )
    {
      break;
    }
    if( segment_end == segment_start )
    {
      segment_start++;
    }
    else if( segment_end != utf8_string )
    {
      segment_start = segment_end + 1;
    }
  }
  while( segment_end != NULL );

  number_of_segments = segment_index;

  if( libfvalue_split_utf8_string_initialize(
       split_string,
       utf8_string,
       utf8_string_size,
       number_of_segments,
       error ) != 1 )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED,
     "%s: unable to initialize split string.",
     function );

    goto on_error;
  }
  if( *split_string == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
     "%s: missing split string.",
     function );

    goto on_error;
  }
  /* Do not bother splitting empty strings
   */
  if( number_of_segments == 0 )
  {
    return( 1 );
  }
  /* Determine the segments
   * empty segments are stored as strings only containing the end of character
   */
  if( libfvalue_split_utf8_string_get_string(
       *split_string,
       &segment_start,
       &string_size,
       error ) != 1 )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
     "%s: unable to retrieve split UTF-8 string.",
     function );

    goto on_error;
  }
  if( segment_start == NULL )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
     "%s: missing segment start.",
     function );

    goto on_error;
  }
  if( string_size < 1 )
  {
    libcerror_error_set(
     error,
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
     LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
     "%s: invalid string size value out of bounds.",
     function );

    goto on_error;
  }
  string_end = &( segment_start[ string_size - 1 ] );

  for( segment_index = 0;
       segment_index < number_of_segments;
       segment_index++ )
  {
    segment_end = segment_start;

    while( segment_end <= string_end )
    {
      if( ( segment_end == string_end )
       || ( *segment_end == 0 ) )
      {
        segment_end = NULL;

        break;
      }
      else if( *segment_end == delimiter )
      {
        break;
      }
      segment_end++;
    }
    if( segment_end == NULL )
    {
      segment_length = (ssize_t) ( string_end - segment_start );
    }
    else
    {
      segment_length = (ssize_t) ( segment_end - segment_start );
    }
    if( segment_length >= 0 )
    {
      segment_start[ segment_length ] = 0;

      if( libfvalue_split_utf8_string_set_segment_by_index(
           *split_string,
           segment_index,
           segment_start,
           segment_length + 1,
           error ) != 1 )
      {
        libcerror_error_set(
         error,
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
         "%s: unable to set split UTF-8 string segment: %d.",
         function,
         segment_index );

        goto on_error;
      }
    }
    if( segment_end == NULL )
    {
      break;
    }
    if( segment_end == string_end )
    {
      segment_start++;
    }
    if( segment_end != string_end )
    {
      segment_start = segment_end + 1;
    }
  }
  return( 1 );

on_error:
  if( *split_string != NULL )
  {
    libfvalue_split_utf8_string_free(
     split_string,
     NULL );
  }
  return( -1 );
}


Coverage Report

Created: 2023-06-07 06:53

Line	Count	Source (jump to first uncovered line)
1		/*
2		* UTF-8 string value functions
3		*
4		* Copyright (C) 2010-2022, Joachim Metz <joachim.metz@gmail.com>
5		*
6		* Refer to AUTHORS for acknowledgements.
7		*
8		* This program is free software: you can redistribute it and/or modify
9		* it under the terms of the GNU Lesser General Public License as published by
10		* the Free Software Foundation, either version 3 of the License, or
11		* (at your option) any later version.
12		*
13		* This program is distributed in the hope that it will be useful,
14		* but WITHOUT ANY WARRANTY; without even the implied warranty of
15		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		* GNU General Public License for more details.
17		*
18		* You should have received a copy of the GNU Lesser General Public License
19		* along with this program. If not, see <https://www.gnu.org/licenses/>.
20		*/
21
22		#include <common.h>
23		#include <types.h>
24
25		#include "libfvalue_libcerror.h"
26		#include "libfvalue_split_utf8_string.h"
27		#include "libfvalue_types.h"
28
29		/* Splits an UTF-8 string
30		* Returns 1 if successful or -1 on error
31		*/
32		int libfvalue_utf8_string_split(
33		const uint8_t *utf8_string,
34		size_t utf8_string_size,
35		uint8_t delimiter,
36		libfvalue_split_utf8_string_t **split_string,
37		libcerror_error_t **error )
38	2.97k	{
39	2.97k	uint8_t *segment_start = NULL;
40	2.97k	uint8_t *segment_end = NULL;
41	2.97k	uint8_t *string_end = NULL;
42	2.97k	static char *function = "libfvalue_utf8_string_split";
43	2.97k	size_t string_size = 0;
44	2.97k	ssize_t segment_length = 0;
45	2.97k	int number_of_segments = 0;
46	2.97k	int segment_index = 0;
47
48	2.97k	if( utf8_string == NULL )
49	0	{
50	0	libcerror_error_set(
51	0	error,
52	0	LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
53	0	LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
54	0	"%s: invalid UTF-8 string.",
55	0	function );
56
57	0	return( -1 );
58	0	}
59	2.97k	if( utf8_string_size > (size_t) SSIZE_MAX )
60	0	{
61	0	libcerror_error_set(
62	0	error,
63	0	LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
64	0	LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
65	0	"%s: invalid UTF-8 string size value exceeds maximum.",
66	0	function );
67
68	0	return( -1 );
69	0	}
70	2.97k	if( split_string == NULL )
71	0	{
72	0	libcerror_error_set(
73	0	error,
74	0	LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
75	0	LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
76	0	"%s: invalid split string.",
77	0	function );
78
79	0	return( -1 );
80	0	}
81	2.97k	if( *split_string != NULL )
82	0	{
83	0	libcerror_error_set(
84	0	error,
85	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
86	0	LIBCERROR_RUNTIME_ERROR_VALUE_ALREADY_SET,
87	0	"%s: invalid split string already set.",
88	0	function );
89
90	0	return( -1 );
91	0	}
92		/* An empty string has no segments
93		*/
94	2.97k	if( ( utf8_string_size == 0 )
95	2.97k	\|\| ( utf8_string[ 0 ] == 0 ) )
96	8	{
97	8	return( 1 );
98	8	}
99		/* Determine the number of segments
100		*/
101	2.96k	segment_start = (uint8_t *) utf8_string;
102	2.96k	string_end = (uint8_t *) &( utf8_string[ utf8_string_size - 1 ] );
103
104	2.96k	do
105	268k	{
106	268k	segment_end = segment_start;
107
108	5.59M	while( segment_end <= string_end )
109	5.59M	{
110	5.59M	if( ( segment_end == string_end )
111	5.59M	\|\| ( *segment_end == 0 ) )
112	2.96k	{
113	2.96k	segment_end = NULL;
114
115	2.96k	break;
116	2.96k	}
117	5.59M	else if( *segment_end == delimiter )
118	265k	{
119	265k	break;
120	265k	}
121	5.32M	segment_end++;
122	5.32M	}
123	268k	if( segment_end > string_end )
124	0	{
125	0	break;
126	0	}
127	268k	segment_index++;
128
129	268k	if( segment_end == NULL )
130	2.96k	{
131	2.96k	break;
132	2.96k	}
133	265k	if( segment_end == segment_start )
134	7.05k	{
135	7.05k	segment_start++;
136	7.05k	}
137	258k	else if( segment_end != utf8_string )
138	258k	{
139	258k	segment_start = segment_end + 1;
140	258k	}
141	265k	}
142	265k	while( segment_end != NULL );
143
144	0	number_of_segments = segment_index;
145
146	2.96k	if( libfvalue_split_utf8_string_initialize(
147	2.96k	split_string,
148	2.96k	utf8_string,
149	2.96k	utf8_string_size,
150	2.96k	number_of_segments,
151	2.96k	error ) != 1 )
152	0	{
153	0	libcerror_error_set(
154	0	error,
155	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
156	0	LIBCERROR_RUNTIME_ERROR_INITIALIZE_FAILED,
157	0	"%s: unable to initialize split string.",
158	0	function );
159
160	0	goto on_error;
161	0	}
162	2.96k	if( *split_string == NULL )
163	0	{
164	0	libcerror_error_set(
165	0	error,
166	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
167	0	LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
168	0	"%s: missing split string.",
169	0	function );
170
171	0	goto on_error;
172	0	}
173		/* Do not bother splitting empty strings
174		*/
175	2.96k	if( number_of_segments == 0 )
176	0	{
177	0	return( 1 );
178	0	}
179		/* Determine the segments
180		* empty segments are stored as strings only containing the end of character
181		*/
182	2.96k	if( libfvalue_split_utf8_string_get_string(
183	2.96k	*split_string,
184	2.96k	&segment_start,
185	2.96k	&string_size,
186	2.96k	error ) != 1 )
187	0	{
188	0	libcerror_error_set(
189	0	error,
190	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
191	0	LIBCERROR_RUNTIME_ERROR_GET_FAILED,
192	0	"%s: unable to retrieve split UTF-8 string.",
193	0	function );
194
195	0	goto on_error;
196	0	}
197	2.96k	if( segment_start == NULL )
198	0	{
199	0	libcerror_error_set(
200	0	error,
201	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
202	0	LIBCERROR_RUNTIME_ERROR_VALUE_MISSING,
203	0	"%s: missing segment start.",
204	0	function );
205
206	0	goto on_error;
207	0	}
208	2.96k	if( string_size < 1 )
209	0	{
210	0	libcerror_error_set(
211	0	error,
212	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
213	0	LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
214	0	"%s: invalid string size value out of bounds.",
215	0	function );
216
217	0	goto on_error;
218	0	}
219	2.96k	string_end = &( segment_start[ string_size - 1 ] );
220
221	2.96k	for( segment_index = 0;
222	268k	segment_index < number_of_segments;
223	265k	segment_index++ )
224	268k	{
225	268k	segment_end = segment_start;
226
227	5.59M	while( segment_end <= string_end )
228	5.59M	{
229	5.59M	if( ( segment_end == string_end )
230	5.59M	\|\| ( *segment_end == 0 ) )
231	2.96k	{
232	2.96k	segment_end = NULL;
233
234	2.96k	break;
235	2.96k	}
236	5.59M	else if( *segment_end == delimiter )
237	265k	{
238	265k	break;
239	265k	}
240	5.32M	segment_end++;
241	5.32M	}
242	268k	if( segment_end == NULL )
243	2.96k	{
244	2.96k	segment_length = (ssize_t) ( string_end - segment_start );
245	2.96k	}
246	265k	else
247	265k	{
248	265k	segment_length = (ssize_t) ( segment_end - segment_start );
249	265k	}
250	268k	if( segment_length >= 0 )
251	268k	{
252	268k	segment_start[ segment_length ] = 0;
253
254	268k	if( libfvalue_split_utf8_string_set_segment_by_index(
255	268k	*split_string,
256	268k	segment_index,
257	268k	segment_start,
258	268k	segment_length + 1,
259	268k	error ) != 1 )
260	0	{
261	0	libcerror_error_set(
262	0	error,
263	0	LIBCERROR_ERROR_DOMAIN_RUNTIME,
264	0	LIBCERROR_RUNTIME_ERROR_SET_FAILED,
265	0	"%s: unable to set split UTF-8 string segment: %d.",
266	0	function,
267	0	segment_index );
268
269	0	goto on_error;
270	0	}
271	268k	}
272	268k	if( segment_end == NULL )
273	2.96k	{
274	2.96k	break;
275	2.96k	}
276	265k	if( segment_end == string_end )
277	0	{
278	0	segment_start++;
279	0	}
280	265k	if( segment_end != string_end )
281	265k	{
282	265k	segment_start = segment_end + 1;
283	265k	}
284	265k	}
285	2.96k	return( 1 );
286
287	0	on_error:
288	0	if( *split_string != NULL )
289	0	{
290	0	libfvalue_split_utf8_string_free(
291	0	split_string,
292	0	NULL );
293	0	}
294	0	return( -1 );
295	2.96k	}
296