/src/tinysparql/src/common/tracker-utils.c

Source
/*
 * Copyright (C) 2006, Jamie McCracken <jamiemcc@gnome.org>
 * Copyright (C) 2008, Nokia <ivan.frade@nokia.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#include "config.h"

#include <stdio.h>
#include <string.h>
#include <locale.h>

#include <glib.h>
#include <glib/gi18n.h>

#include "tracker-utils.h"

/**
 * tracker_strhex:
 * @data: The input array of bytes
 * @size: Number of bytes in the input array
 * @delimiter: Character to use as separator between each printed byte
 *
 * Returns the contents of @data as a printable string in hexadecimal
 *  representation.
 *
 * Based on GNU PDF's pdf_text_test_get_hex()
 *
 * Returns: A newly allocated string which should be disposed with g_free()
 **/
gchar *
tracker_strhex (const guint8 *data,
                gsize         size,
                gchar         delimiter)
{
  gsize i;
  gsize j;
  gsize new_str_length;
  gchar *new_str;

  /* Get new string length. If input string has N bytes, we need:
   * - 1 byte for last NUL char
   * - 2N bytes for hexadecimal char representation of each byte...
   * - N-1 bytes for the separator ':'
   * So... a total of (1+2N+N-1) = 3N bytes are needed... */
  new_str_length =  3 * size;

  /* Allocate memory for new array and initialize contents to NUL */
  new_str = g_malloc0 (new_str_length);

  /* Print hexadecimal representation of each byte... */
  for(i=0, j=0; i<size; i++, j+=3) {
    /* Print character in output string... */
    snprintf (&new_str[j], 3, "%02X", data[i]);

    /* And if needed, add separator */
    if(i != (size-1) ) {
      new_str[j+2] = delimiter;
    }
  }

  /* Set output string */
  return new_str;
}

/**
 * tracker_utf8_truncate:
 * @str: Nul-terminated input string
 * @max_size: Maximum length of the output string
 *
 * Returns up to @max_size characters long substring of @str, followed
 * with "[…]" when actually truncated.
 *
 * Returns: A newly allocated string which should be disposed with g_free()
 */
gchar *
tracker_utf8_truncate (const gchar  *str,
                       gsize         max_size)
{
  gchar *retv = NULL;

  if (!g_utf8_validate (str, -1, NULL)) {
    retv = g_strdup ("[Invalid UTF-8]");
  } else if ((gsize) g_utf8_strlen (str, -1) > max_size) {
    gchar *substring = g_utf8_substring (str, 0, max_size - 3);
    retv = g_strdup_printf ("%s[…]", substring);
    g_free (substring);
  } else {
    retv = g_strdup (str);
  }

  return retv;
}

static gboolean
range_is_xdigit (const gchar *str,
                 gssize       start,
                 gssize       end)
{
  gssize i;

  g_assert (end > start);

  for (i = start; i < end; i++) {
    if (!g_ascii_isxdigit (str[i]))
      return FALSE;
  }

  return TRUE;
}

static gunichar
xdigit_to_unichar (const gchar *str,
       gssize       start,
       gssize       end)
{
  gunichar ch = 0;
  gssize i;

  g_assert (end > start);

  for (i = start; i < end; i++) {
    ch |= g_ascii_xdigit_value (str[i]);
    if (i < end - 1)
      ch <<= 4;
  }

  return ch;
}

/*
 * tracker_unescape_unichars:
 * @str: Input string
 * @len: Length
 *
 * Unescapes \u and \U sequences into their respective unichars.
 *
 * Returns: a string with no \u nor \U sequences
 */
gchar *
tracker_unescape_unichars (const gchar  *str,
                           gssize        len)
{
  GString *copy;
  gunichar ch;
  gssize i = 0;

  if (len < 0)
    len = strlen (str);

  copy = g_string_new (NULL);

  while (i < len) {
    if (len - i >= 2 &&
        str[i] == '\\' &&
        g_ascii_tolower (str[i + 1]) != 'u') {
      /* Not an unicode escape sequence */
      g_string_append_c (copy, str[i]);
      g_string_append_c (copy, str[i + 1]);
      i += 2;
    } else if (len - i >= 6 &&
        strncmp (&str[i], "\\u", 2) == 0 &&
        range_is_xdigit (&str[i], 2, 6)) {
      ch = xdigit_to_unichar (&str[i], 2, 6);
      g_string_append_unichar (copy, ch);
      i += 6;
    } else if (len - i >= 10 &&
               strncmp (&str[i], "\\U", 2) == 0 &&
               range_is_xdigit (&str[i], 2, 10)) {
      ch = xdigit_to_unichar (&str[i], 2, 10);
      g_string_append_unichar (copy, ch);
      i += 10;
    } else {
      g_string_append_c (copy, str[i]);
      i++;
    }
  }

  return g_string_free (copy, FALSE);
}

gboolean
parse_abs_uri (const gchar  *uri,
               gchar       **base,
               const gchar **rel_path)
{
  const gchar *loc, *end;

  end = &uri[strlen (uri)];
  loc = uri;

  if (!g_ascii_isalpha (loc[0]))
    return FALSE;

  while (loc != end) {
    if (loc[0] == ':')
      break;
    if (!g_ascii_isalpha (loc[0]) &&
        loc[0] != '+' && loc[0] != '-' && loc[0] != '.')
      return FALSE;
    loc++;
  }

  if (loc == uri)
    return FALSE;

  if (strncmp (loc, "://", 3) == 0) {
    /* Include authority in base */
    loc += 3;
    loc = strchr (loc, '/');
    if (!loc)
      loc = end;
  }

  *base = g_strndup (uri, loc - uri);
  *rel_path = loc + 1;

  return TRUE;
}

GPtrArray *
remove_dot_segments (gchar **uri_elems)
{
  GPtrArray *array;
  gint i;

  array = g_ptr_array_new ();

  for (i = 0; uri_elems[i] != NULL; i++) {
    if (g_strcmp0 (uri_elems[i], ".") == 0) {
      continue;
    } else if (g_strcmp0 (uri_elems[i], "..") == 0) {
      if (array->len > 0)
        g_ptr_array_remove_index (array, array->len - 1);
      continue;
    } else if (*uri_elems[i] != '\0') {
      /* NB: Not a copy */
      g_ptr_array_add (array, uri_elems[i]);
    }
  }

  return array;
}

gchar *
tracker_resolve_relative_uri (const gchar  *base,
                              const gchar  *rel_uri)
{
  gchar **base_split, **rel_split, *host;
  GPtrArray *base_norm, *rel_norm;
  GString *str;
  guint i;

  /* Relative IRIs are combined with base IRIs with a simplified version
   * of the algorithm described at RFC3986, Section 5.2. We don't care
   * about query and fragment parts of an URI, and some simplifications
   * are taken on base uri parsing and relative uri validation.
   */
  rel_split = g_strsplit (rel_uri, "/", -1);

  /* Rel uri is a full uri? */
  if (strchr (rel_split[0], ':')) {
    g_strfreev (rel_split);
    return g_strdup (rel_uri);
  }

  if (!parse_abs_uri (base, &host, &base)) {
    g_strfreev (rel_split);
    return g_strdup (rel_uri);
  }

  base_split = g_strsplit (base, "/", -1);

  base_norm = remove_dot_segments (base_split);
  rel_norm = remove_dot_segments (rel_split);

  for (i = 0; i < rel_norm->len; i++) {
    g_ptr_array_add (base_norm,
                     g_ptr_array_index (rel_norm, i));
  }

  str = g_string_new (host);
  for (i = 0; i < base_norm->len; i++) {
    g_string_append_c (str, '/');
    g_string_append (str,
                     g_ptr_array_index (base_norm, i));
  }

  g_ptr_array_unref (base_norm);
  g_ptr_array_unref (rel_norm);
  g_strfreev (base_split);
  g_strfreev (rel_split);
  g_free (host);

  return g_string_free (str, FALSE);
}

gboolean
tracker_util_parse_dbus_uri (const gchar  *uri,
                             GBusType     *bus_type,
                             gchar       **service,
                             gchar       **path)
{
  const gchar *separator;

  g_assert (uri != NULL);

  if (!g_str_has_prefix (uri, "dbus:"))
    return FALSE;

  uri += strlen ("dbus:");

  if (g_str_has_prefix (uri, "system:")) {
    *bus_type = G_BUS_TYPE_SYSTEM;
    uri += strlen ("system:");
  } else if (g_str_has_prefix (uri, "session:")) {
    *bus_type = G_BUS_TYPE_SESSION;
    uri += strlen ("session:");
  } else {
    /* Fall back to session bus by default */
    *bus_type = G_BUS_TYPE_SESSION;
  }

  separator = strstr (uri, ":/");

  if (separator) {
    *service = g_strndup (uri, separator - uri);
    separator += 1;
    *path = g_strdup (separator);
  } else {
    *service = g_strdup (uri);
    *path = NULL;
  }

  return TRUE;
}

gchar *
tracker_util_build_dbus_uri (GBusType     bus_type,
                             const gchar *service,
                             const gchar *path)
{
  GString *str;

  if (!g_dbus_is_name (service))
    return NULL;
  if (path && path[0] != '/')
    return NULL;

  if (bus_type == G_BUS_TYPE_SESSION)
    str = g_string_new ("dbus:");
  else if (bus_type == G_BUS_TYPE_SYSTEM)
    str = g_string_new ("dbus:system:");
  else
    return NULL;

  g_string_append (str, service);

  if (path) {
    g_string_append_c (str, ':');
    g_string_append (str, path);
  }

  return g_string_free (str, FALSE);
}

Coverage Report

Created: 2025-11-09 06:54

Line	Count	Source
1		/*
2		* Copyright (C) 2006, Jamie McCracken <jamiemcc@gnome.org>
3		* Copyright (C) 2008, Nokia <ivan.frade@nokia.com>
4		*
5		* This library is free software; you can redistribute it and/or
6		* modify it under the terms of the GNU Lesser General Public
7		* License as published by the Free Software Foundation; either
8		* version 2.1 of the License, or (at your option) any later version.
9		*
10		* This library is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13		* Lesser General Public License for more details.
14		*
15		* You should have received a copy of the GNU Lesser General Public
16		* License along with this library; if not, write to the
17		* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18		* Boston, MA 02110-1301, USA.
19		*/
20
21		#include "config.h"
22
23		#include <stdio.h>
24		#include <string.h>
25		#include <locale.h>
26
27		#include <glib.h>
28		#include <glib/gi18n.h>
29
30		#include "tracker-utils.h"
31
32		/**
33		* tracker_strhex:
34		* @data: The input array of bytes
35		* @size: Number of bytes in the input array
36		* @delimiter: Character to use as separator between each printed byte
37		*
38		* Returns the contents of @data as a printable string in hexadecimal
39		* representation.
40		*
41		* Based on GNU PDF's pdf_text_test_get_hex()
42		*
43		* Returns: A newly allocated string which should be disposed with g_free()
44		**/
45		gchar *
46		tracker_strhex (const guint8 *data,
47		gsize size,
48		gchar delimiter)
49	0	{
50	0	gsize i;
51	0	gsize j;
52	0	gsize new_str_length;
53	0	gchar *new_str;
54
55		/* Get new string length. If input string has N bytes, we need:
56		* - 1 byte for last NUL char
57		* - 2N bytes for hexadecimal char representation of each byte...
58		* - N-1 bytes for the separator ':'
59		* So... a total of (1+2N+N-1) = 3N bytes are needed... */
60	0	new_str_length = 3 * size;
61
62		/* Allocate memory for new array and initialize contents to NUL */
63	0	new_str = g_malloc0 (new_str_length);
64
65		/* Print hexadecimal representation of each byte... */
66	0	for(i=0, j=0; i<size; i++, j+=3) {
67		/* Print character in output string... */
68	0	snprintf (&new_str[j], 3, "%02X", data[i]);
69
70		/* And if needed, add separator */
71	0	if(i != (size-1) ) {
72	0	new_str[j+2] = delimiter;
73	0	}
74	0	}
75
76		/* Set output string */
77	0	return new_str;
78	0	}
79
80		/**
81		* tracker_utf8_truncate:
82		* @str: Nul-terminated input string
83		* @max_size: Maximum length of the output string
84		*
85		* Returns up to @max_size characters long substring of @str, followed
86		* with "[…]" when actually truncated.
87		*
88		* Returns: A newly allocated string which should be disposed with g_free()
89		*/
90		gchar *
91		tracker_utf8_truncate (const gchar *str,
92		gsize max_size)
93	0	{
94	0	gchar *retv = NULL;
95
96	0	if (!g_utf8_validate (str, -1, NULL)) {
97	0	retv = g_strdup ("[Invalid UTF-8]");
98	0	} else if ((gsize) g_utf8_strlen (str, -1) > max_size) {
99	0	gchar *substring = g_utf8_substring (str, 0, max_size - 3);
100	0	retv = g_strdup_printf ("%s[…]", substring);
101	0	g_free (substring);
102	0	} else {
103	0	retv = g_strdup (str);
104	0	}
105
106	0	return retv;
107	0	}
108
109		static gboolean
110		range_is_xdigit (const gchar *str,
111		gssize start,
112		gssize end)
113	0	{
114	0	gssize i;
115
116	0	g_assert (end > start);
117
118	0	for (i = start; i < end; i++) {
119	0	if (!g_ascii_isxdigit (str[i]))
120	0	return FALSE;
121	0	}
122
123	0	return TRUE;
124	0	}
125
126		static gunichar
127		xdigit_to_unichar (const gchar *str,
128		gssize start,
129		gssize end)
130	0	{
131	0	gunichar ch = 0;
132	0	gssize i;
133
134	0	g_assert (end > start);
135
136	0	for (i = start; i < end; i++) {
137	0	ch \|= g_ascii_xdigit_value (str[i]);
138	0	if (i < end - 1)
139	0	ch <<= 4;
140	0	}
141
142	0	return ch;
143	0	}
144
145		/*
146		* tracker_unescape_unichars:
147		* @str: Input string
148		* @len: Length
149		*
150		* Unescapes \u and \U sequences into their respective unichars.
151		*
152		* Returns: a string with no \u nor \U sequences
153		*/
154		gchar *
155		tracker_unescape_unichars (const gchar *str,
156		gssize len)
157	0	{
158	0	GString *copy;
159	0	gunichar ch;
160	0	gssize i = 0;
161
162	0	if (len < 0)
163	0	len = strlen (str);
164
165	0	copy = g_string_new (NULL);
166
167	0	while (i < len) {
168	0	if (len - i >= 2 &&
169	0	str[i] == '\\' &&
170	0	g_ascii_tolower (str[i + 1]) != 'u') {
171		/* Not an unicode escape sequence */
172	0	g_string_append_c (copy, str[i]);
173	0	g_string_append_c (copy, str[i + 1]);
174	0	i += 2;
175	0	} else if (len - i >= 6 &&
176	0	strncmp (&str[i], "\\u", 2) == 0 &&
177	0	range_is_xdigit (&str[i], 2, 6)) {
178	0	ch = xdigit_to_unichar (&str[i], 2, 6);
179	0	g_string_append_unichar (copy, ch);
180	0	i += 6;
181	0	} else if (len - i >= 10 &&
182	0	strncmp (&str[i], "\\U", 2) == 0 &&
183	0	range_is_xdigit (&str[i], 2, 10)) {
184	0	ch = xdigit_to_unichar (&str[i], 2, 10);
185	0	g_string_append_unichar (copy, ch);
186	0	i += 10;
187	0	} else {
188	0	g_string_append_c (copy, str[i]);
189	0	i++;
190	0	}
191	0	}
192
193	0	return g_string_free (copy, FALSE);
194	0	}
195
196		gboolean
197		parse_abs_uri (const gchar *uri,
198		gchar **base,
199		const gchar **rel_path)
200	0	{
201	0	const gchar loc, end;
202
203	0	end = &uri[strlen (uri)];
204	0	loc = uri;
205
206	0	if (!g_ascii_isalpha (loc[0]))
207	0	return FALSE;
208
209	0	while (loc != end) {
210	0	if (loc[0] == ':')
211	0	break;
212	0	if (!g_ascii_isalpha (loc[0]) &&
213	0	loc[0] != '+' && loc[0] != '-' && loc[0] != '.')
214	0	return FALSE;
215	0	loc++;
216	0	}
217
218	0	if (loc == uri)
219	0	return FALSE;
220
221	0	if (strncmp (loc, "://", 3) == 0) {
222		/* Include authority in base */
223	0	loc += 3;
224	0	loc = strchr (loc, '/');
225	0	if (!loc)
226	0	loc = end;
227	0	}
228
229	0	*base = g_strndup (uri, loc - uri);
230	0	*rel_path = loc + 1;
231
232	0	return TRUE;
233	0	}
234
235		GPtrArray *
236		remove_dot_segments (gchar **uri_elems)
237	0	{
238	0	GPtrArray *array;
239	0	gint i;
240
241	0	array = g_ptr_array_new ();
242
243	0	for (i = 0; uri_elems[i] != NULL; i++) {
244	0	if (g_strcmp0 (uri_elems[i], ".") == 0) {
245	0	continue;
246	0	} else if (g_strcmp0 (uri_elems[i], "..") == 0) {
247	0	if (array->len > 0)
248	0	g_ptr_array_remove_index (array, array->len - 1);
249	0	continue;
250	0	} else if (*uri_elems[i] != '\0') {
251		/* NB: Not a copy */
252	0	g_ptr_array_add (array, uri_elems[i]);
253	0	}
254	0	}
255
256	0	return array;
257	0	}
258
259		gchar *
260		tracker_resolve_relative_uri (const gchar *base,
261		const gchar *rel_uri)
262	0	{
263	0	gchar base_split, rel_split, *host;
264	0	GPtrArray base_norm, rel_norm;
265	0	GString *str;
266	0	guint i;
267
268		/* Relative IRIs are combined with base IRIs with a simplified version
269		* of the algorithm described at RFC3986, Section 5.2. We don't care
270		* about query and fragment parts of an URI, and some simplifications
271		* are taken on base uri parsing and relative uri validation.
272		*/
273	0	rel_split = g_strsplit (rel_uri, "/", -1);
274
275		/* Rel uri is a full uri? */
276	0	if (strchr (rel_split[0], ':')) {
277	0	g_strfreev (rel_split);
278	0	return g_strdup (rel_uri);
279	0	}
280
281	0	if (!parse_abs_uri (base, &host, &base)) {
282	0	g_strfreev (rel_split);
283	0	return g_strdup (rel_uri);
284	0	}
285
286	0	base_split = g_strsplit (base, "/", -1);
287
288	0	base_norm = remove_dot_segments (base_split);
289	0	rel_norm = remove_dot_segments (rel_split);
290
291	0	for (i = 0; i < rel_norm->len; i++) {
292	0	g_ptr_array_add (base_norm,
293	0	g_ptr_array_index (rel_norm, i));
294	0	}
295
296	0	str = g_string_new (host);
297	0	for (i = 0; i < base_norm->len; i++) {
298	0	g_string_append_c (str, '/');
299	0	g_string_append (str,
300	0	g_ptr_array_index (base_norm, i));
301	0	}
302
303	0	g_ptr_array_unref (base_norm);
304	0	g_ptr_array_unref (rel_norm);
305	0	g_strfreev (base_split);
306	0	g_strfreev (rel_split);
307	0	g_free (host);
308
309	0	return g_string_free (str, FALSE);
310	0	}
311
312		gboolean
313		tracker_util_parse_dbus_uri (const gchar *uri,
314		GBusType *bus_type,
315		gchar **service,
316		gchar **path)
317	0	{
318	0	const gchar *separator;
319
320	0	g_assert (uri != NULL);
321
322	0	if (!g_str_has_prefix (uri, "dbus:"))
323	0	return FALSE;
324
325	0	uri += strlen ("dbus:");
326
327	0	if (g_str_has_prefix (uri, "system:")) {
328	0	*bus_type = G_BUS_TYPE_SYSTEM;
329	0	uri += strlen ("system:");
330	0	} else if (g_str_has_prefix (uri, "session:")) {
331	0	*bus_type = G_BUS_TYPE_SESSION;
332	0	uri += strlen ("session:");
333	0	} else {
334		/* Fall back to session bus by default */
335	0	*bus_type = G_BUS_TYPE_SESSION;
336	0	}
337
338	0	separator = strstr (uri, ":/");
339
340	0	if (separator) {
341	0	*service = g_strndup (uri, separator - uri);
342	0	separator += 1;
343	0	*path = g_strdup (separator);
344	0	} else {
345	0	*service = g_strdup (uri);
346	0	*path = NULL;
347	0	}
348
349	0	return TRUE;
350	0	}
351
352		gchar *
353		tracker_util_build_dbus_uri (GBusType bus_type,
354		const gchar *service,
355		const gchar *path)
356	0	{
357	0	GString *str;
358
359	0	if (!g_dbus_is_name (service))
360	0	return NULL;
361	0	if (path && path[0] != '/')
362	0	return NULL;
363
364	0	if (bus_type == G_BUS_TYPE_SESSION)
365	0	str = g_string_new ("dbus:");
366	0	else if (bus_type == G_BUS_TYPE_SYSTEM)
367	0	str = g_string_new ("dbus:system:");
368	0	else
369	0	return NULL;
370
371	0	g_string_append (str, service);
372
373	0	if (path) {
374	0	g_string_append_c (str, ':');
375	0	g_string_append (str, path);
376	0	}
377
378	0	return g_string_free (str, FALSE);
379	0	}