/src/tinysparql/src/common/tracker-utils.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2006, Jamie McCracken <jamiemcc@gnome.org> |
3 | | * Copyright (C) 2008, Nokia <ivan.frade@nokia.com> |
4 | | * |
5 | | * This library is free software; you can redistribute it and/or |
6 | | * modify it under the terms of the GNU Lesser General Public |
7 | | * License as published by the Free Software Foundation; either |
8 | | * version 2.1 of the License, or (at your option) any later version. |
9 | | * |
10 | | * This library is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | | * Lesser General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU Lesser General Public |
16 | | * License along with this library; if not, write to the |
17 | | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
18 | | * Boston, MA 02110-1301, USA. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include <stdio.h> |
24 | | #include <string.h> |
25 | | #include <locale.h> |
26 | | |
27 | | #include <glib.h> |
28 | | #include <glib/gi18n.h> |
29 | | |
30 | | #include "tracker-utils.h" |
31 | | |
32 | | /** |
33 | | * tracker_strhex: |
34 | | * @data: The input array of bytes |
35 | | * @size: Number of bytes in the input array |
36 | | * @delimiter: Character to use as separator between each printed byte |
37 | | * |
38 | | * Returns the contents of @data as a printable string in hexadecimal |
39 | | * representation. |
40 | | * |
41 | | * Based on GNU PDF's pdf_text_test_get_hex() |
42 | | * |
43 | | * Returns: A newly allocated string which should be disposed with g_free() |
44 | | **/ |
45 | | gchar * |
46 | | tracker_strhex (const guint8 *data, |
47 | | gsize size, |
48 | | gchar delimiter) |
49 | 0 | { |
50 | 0 | gsize i; |
51 | 0 | gsize j; |
52 | 0 | gsize new_str_length; |
53 | 0 | gchar *new_str; |
54 | | |
55 | | /* Get new string length. If input string has N bytes, we need: |
56 | | * - 1 byte for last NUL char |
57 | | * - 2N bytes for hexadecimal char representation of each byte... |
58 | | * - N-1 bytes for the separator ':' |
59 | | * So... a total of (1+2N+N-1) = 3N bytes are needed... */ |
60 | 0 | new_str_length = 3 * size; |
61 | | |
62 | | /* Allocate memory for new array and initialize contents to NUL */ |
63 | 0 | new_str = g_malloc0 (new_str_length); |
64 | | |
65 | | /* Print hexadecimal representation of each byte... */ |
66 | 0 | for(i=0, j=0; i<size; i++, j+=3) { |
67 | | /* Print character in output string... */ |
68 | 0 | snprintf (&new_str[j], 3, "%02X", data[i]); |
69 | | |
70 | | /* And if needed, add separator */ |
71 | 0 | if(i != (size-1) ) { |
72 | 0 | new_str[j+2] = delimiter; |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | | /* Set output string */ |
77 | 0 | return new_str; |
78 | 0 | } |
79 | | |
80 | | /** |
81 | | * tracker_utf8_truncate: |
82 | | * @str: Nul-terminated input string |
83 | | * @max_size: Maximum length of the output string |
84 | | * |
85 | | * Returns up to @max_size characters long substring of @str, followed |
86 | | * with "[…]" when actually truncated. |
87 | | * |
88 | | * Returns: A newly allocated string which should be disposed with g_free() |
89 | | */ |
90 | | gchar * |
91 | | tracker_utf8_truncate (const gchar *str, |
92 | | gsize max_size) |
93 | 0 | { |
94 | 0 | gchar *retv = NULL; |
95 | |
|
96 | 0 | if (!g_utf8_validate (str, -1, NULL)) { |
97 | 0 | retv = g_strdup ("[Invalid UTF-8]"); |
98 | 0 | } else if ((gsize) g_utf8_strlen (str, -1) > max_size) { |
99 | 0 | gchar *substring = g_utf8_substring (str, 0, max_size - 3); |
100 | 0 | retv = g_strdup_printf ("%s[…]", substring); |
101 | 0 | g_free (substring); |
102 | 0 | } else { |
103 | 0 | retv = g_strdup (str); |
104 | 0 | } |
105 | |
|
106 | 0 | return retv; |
107 | 0 | } |
108 | | |
109 | | static gboolean |
110 | | range_is_xdigit (const gchar *str, |
111 | | gssize start, |
112 | | gssize end) |
113 | 0 | { |
114 | 0 | gssize i; |
115 | |
|
116 | 0 | g_assert (end > start); |
117 | | |
118 | 0 | for (i = start; i < end; i++) { |
119 | 0 | if (!g_ascii_isxdigit (str[i])) |
120 | 0 | return FALSE; |
121 | 0 | } |
122 | | |
123 | 0 | return TRUE; |
124 | 0 | } |
125 | | |
126 | | static gunichar |
127 | | xdigit_to_unichar (const gchar *str, |
128 | | gssize start, |
129 | | gssize end) |
130 | 0 | { |
131 | 0 | gunichar ch = 0; |
132 | 0 | gssize i; |
133 | |
|
134 | 0 | g_assert (end > start); |
135 | | |
136 | 0 | for (i = start; i < end; i++) { |
137 | 0 | ch |= g_ascii_xdigit_value (str[i]); |
138 | 0 | if (i < end - 1) |
139 | 0 | ch <<= 4; |
140 | 0 | } |
141 | |
|
142 | 0 | return ch; |
143 | 0 | } |
144 | | |
145 | | /* |
146 | | * tracker_unescape_unichars: |
147 | | * @str: Input string |
148 | | * @len: Length |
149 | | * |
150 | | * Unescapes \u and \U sequences into their respective unichars. |
151 | | * |
152 | | * Returns: a string with no \u nor \U sequences |
153 | | */ |
154 | | gchar * |
155 | | tracker_unescape_unichars (const gchar *str, |
156 | | gssize len) |
157 | 0 | { |
158 | 0 | GString *copy; |
159 | 0 | gunichar ch; |
160 | 0 | gssize i = 0; |
161 | |
|
162 | 0 | if (len < 0) |
163 | 0 | len = strlen (str); |
164 | |
|
165 | 0 | copy = g_string_new (NULL); |
166 | |
|
167 | 0 | while (i < len) { |
168 | 0 | if (len - i >= 2 && |
169 | 0 | str[i] == '\\' && |
170 | 0 | g_ascii_tolower (str[i + 1]) != 'u') { |
171 | | /* Not an unicode escape sequence */ |
172 | 0 | g_string_append_c (copy, str[i]); |
173 | 0 | g_string_append_c (copy, str[i + 1]); |
174 | 0 | i += 2; |
175 | 0 | } else if (len - i >= 6 && |
176 | 0 | strncmp (&str[i], "\\u", 2) == 0 && |
177 | 0 | range_is_xdigit (&str[i], 2, 6)) { |
178 | 0 | ch = xdigit_to_unichar (&str[i], 2, 6); |
179 | 0 | g_string_append_unichar (copy, ch); |
180 | 0 | i += 6; |
181 | 0 | } else if (len - i >= 10 && |
182 | 0 | strncmp (&str[i], "\\U", 2) == 0 && |
183 | 0 | range_is_xdigit (&str[i], 2, 10)) { |
184 | 0 | ch = xdigit_to_unichar (&str[i], 2, 10); |
185 | 0 | g_string_append_unichar (copy, ch); |
186 | 0 | i += 10; |
187 | 0 | } else { |
188 | 0 | g_string_append_c (copy, str[i]); |
189 | 0 | i++; |
190 | 0 | } |
191 | 0 | } |
192 | |
|
193 | 0 | return g_string_free (copy, FALSE); |
194 | 0 | } |
195 | | |
196 | | gboolean |
197 | | parse_abs_uri (const gchar *uri, |
198 | | gchar **base, |
199 | | const gchar **rel_path) |
200 | 0 | { |
201 | 0 | const gchar *loc, *end; |
202 | |
|
203 | 0 | end = &uri[strlen (uri)]; |
204 | 0 | loc = uri; |
205 | |
|
206 | 0 | if (!g_ascii_isalpha (loc[0])) |
207 | 0 | return FALSE; |
208 | | |
209 | 0 | while (loc != end) { |
210 | 0 | if (loc[0] == ':') |
211 | 0 | break; |
212 | 0 | if (!g_ascii_isalpha (loc[0]) && |
213 | 0 | loc[0] != '+' && loc[0] != '-' && loc[0] != '.') |
214 | 0 | return FALSE; |
215 | 0 | loc++; |
216 | 0 | } |
217 | | |
218 | 0 | if (loc == uri) |
219 | 0 | return FALSE; |
220 | | |
221 | 0 | if (strncmp (loc, "://", 3) == 0) { |
222 | | /* Include authority in base */ |
223 | 0 | loc += 3; |
224 | 0 | loc = strchr (loc, '/'); |
225 | 0 | if (!loc) |
226 | 0 | loc = end; |
227 | 0 | } |
228 | |
|
229 | 0 | *base = g_strndup (uri, loc - uri); |
230 | 0 | *rel_path = loc + 1; |
231 | |
|
232 | 0 | return TRUE; |
233 | 0 | } |
234 | | |
235 | | GPtrArray * |
236 | | remove_dot_segments (gchar **uri_elems) |
237 | 0 | { |
238 | 0 | GPtrArray *array; |
239 | 0 | gint i; |
240 | |
|
241 | 0 | array = g_ptr_array_new (); |
242 | |
|
243 | 0 | for (i = 0; uri_elems[i] != NULL; i++) { |
244 | 0 | if (g_strcmp0 (uri_elems[i], ".") == 0) { |
245 | 0 | continue; |
246 | 0 | } else if (g_strcmp0 (uri_elems[i], "..") == 0) { |
247 | 0 | if (array->len > 0) |
248 | 0 | g_ptr_array_remove_index (array, array->len - 1); |
249 | 0 | continue; |
250 | 0 | } else if (*uri_elems[i] != '\0') { |
251 | | /* NB: Not a copy */ |
252 | 0 | g_ptr_array_add (array, uri_elems[i]); |
253 | 0 | } |
254 | 0 | } |
255 | |
|
256 | 0 | return array; |
257 | 0 | } |
258 | | |
259 | | gchar * |
260 | | tracker_resolve_relative_uri (const gchar *base, |
261 | | const gchar *rel_uri) |
262 | 0 | { |
263 | 0 | gchar **base_split, **rel_split, *host; |
264 | 0 | GPtrArray *base_norm, *rel_norm; |
265 | 0 | GString *str; |
266 | 0 | guint i; |
267 | | |
268 | | /* Relative IRIs are combined with base IRIs with a simplified version |
269 | | * of the algorithm described at RFC3986, Section 5.2. We don't care |
270 | | * about query and fragment parts of an URI, and some simplifications |
271 | | * are taken on base uri parsing and relative uri validation. |
272 | | */ |
273 | 0 | rel_split = g_strsplit (rel_uri, "/", -1); |
274 | | |
275 | | /* Rel uri is a full uri? */ |
276 | 0 | if (strchr (rel_split[0], ':')) { |
277 | 0 | g_strfreev (rel_split); |
278 | 0 | return g_strdup (rel_uri); |
279 | 0 | } |
280 | | |
281 | 0 | if (!parse_abs_uri (base, &host, &base)) { |
282 | 0 | g_strfreev (rel_split); |
283 | 0 | return g_strdup (rel_uri); |
284 | 0 | } |
285 | | |
286 | 0 | base_split = g_strsplit (base, "/", -1); |
287 | |
|
288 | 0 | base_norm = remove_dot_segments (base_split); |
289 | 0 | rel_norm = remove_dot_segments (rel_split); |
290 | |
|
291 | 0 | for (i = 0; i < rel_norm->len; i++) { |
292 | 0 | g_ptr_array_add (base_norm, |
293 | 0 | g_ptr_array_index (rel_norm, i)); |
294 | 0 | } |
295 | |
|
296 | 0 | str = g_string_new (host); |
297 | 0 | for (i = 0; i < base_norm->len; i++) { |
298 | 0 | g_string_append_c (str, '/'); |
299 | 0 | g_string_append (str, |
300 | 0 | g_ptr_array_index (base_norm, i)); |
301 | 0 | } |
302 | |
|
303 | 0 | g_ptr_array_unref (base_norm); |
304 | 0 | g_ptr_array_unref (rel_norm); |
305 | 0 | g_strfreev (base_split); |
306 | 0 | g_strfreev (rel_split); |
307 | 0 | g_free (host); |
308 | |
|
309 | 0 | return g_string_free (str, FALSE); |
310 | 0 | } |
311 | | |
312 | | gboolean |
313 | | tracker_util_parse_dbus_uri (const gchar *uri, |
314 | | GBusType *bus_type, |
315 | | gchar **service, |
316 | | gchar **path) |
317 | 0 | { |
318 | 0 | const gchar *separator; |
319 | |
|
320 | 0 | g_assert (uri != NULL); |
321 | | |
322 | 0 | if (!g_str_has_prefix (uri, "dbus:")) |
323 | 0 | return FALSE; |
324 | | |
325 | 0 | uri += strlen ("dbus:"); |
326 | |
|
327 | 0 | if (g_str_has_prefix (uri, "system:")) { |
328 | 0 | *bus_type = G_BUS_TYPE_SYSTEM; |
329 | 0 | uri += strlen ("system:"); |
330 | 0 | } else if (g_str_has_prefix (uri, "session:")) { |
331 | 0 | *bus_type = G_BUS_TYPE_SESSION; |
332 | 0 | uri += strlen ("session:"); |
333 | 0 | } else { |
334 | | /* Fall back to session bus by default */ |
335 | 0 | *bus_type = G_BUS_TYPE_SESSION; |
336 | 0 | } |
337 | |
|
338 | 0 | separator = strstr (uri, ":/"); |
339 | |
|
340 | 0 | if (separator) { |
341 | 0 | *service = g_strndup (uri, separator - uri); |
342 | 0 | separator += 1; |
343 | 0 | *path = g_strdup (separator); |
344 | 0 | } else { |
345 | 0 | *service = g_strdup (uri); |
346 | 0 | *path = NULL; |
347 | 0 | } |
348 | |
|
349 | 0 | return TRUE; |
350 | 0 | } |
351 | | |
352 | | gchar * |
353 | | tracker_util_build_dbus_uri (GBusType bus_type, |
354 | | const gchar *service, |
355 | | const gchar *path) |
356 | 0 | { |
357 | 0 | GString *str; |
358 | |
|
359 | 0 | if (!g_dbus_is_name (service)) |
360 | 0 | return NULL; |
361 | 0 | if (path && path[0] != '/') |
362 | 0 | return NULL; |
363 | | |
364 | 0 | if (bus_type == G_BUS_TYPE_SESSION) |
365 | 0 | str = g_string_new ("dbus:"); |
366 | 0 | else if (bus_type == G_BUS_TYPE_SYSTEM) |
367 | 0 | str = g_string_new ("dbus:system:"); |
368 | 0 | else |
369 | 0 | return NULL; |
370 | | |
371 | 0 | g_string_append (str, service); |
372 | |
|
373 | 0 | if (path) { |
374 | 0 | g_string_append_c (str, ':'); |
375 | 0 | g_string_append (str, path); |
376 | 0 | } |
377 | |
|
378 | 0 | return g_string_free (str, FALSE); |
379 | 0 | } |