/src/tinysparql/src/libtinysparql/tracker-serializer-turtle.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2021, Red Hat, Inc |
3 | | * |
4 | | * This library is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Lesser General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2.1 of the License, or (at your option) any later version. |
8 | | * |
9 | | * This library is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | | * Lesser General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Lesser General Public |
15 | | * License along with this library; if not, write to the |
16 | | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
17 | | * Boston, MA 02110-1301, USA. |
18 | | * |
19 | | * Author: Carlos Garnacho <carlosg@gnome.org> |
20 | | */ |
21 | | |
22 | | /* Serialization of cursors to the turtle format defined at: |
23 | | * https://www.w3.org/TR/turtle/ |
24 | | */ |
25 | | |
26 | | #include "config.h" |
27 | | |
28 | | #include "tracker-serializer-turtle.h" |
29 | | |
30 | | typedef struct _TrackerTriple TrackerTriple; |
31 | | |
32 | | struct _TrackerTriple |
33 | | { |
34 | | gchar *subject; |
35 | | gchar *predicate; |
36 | | gchar *object; |
37 | | gchar *object_langtag; |
38 | | TrackerSparqlValueType subject_type; |
39 | | TrackerSparqlValueType object_type; |
40 | | }; |
41 | | |
42 | | struct _TrackerSerializerTurtle |
43 | | { |
44 | | TrackerSerializer parent_instance; |
45 | | TrackerTriple last_triple; |
46 | | GString *data; |
47 | | guint stream_closed : 1; |
48 | | guint cursor_started : 1; |
49 | | guint cursor_finished : 1; |
50 | | guint head_printed : 1; |
51 | | guint has_triples : 1; |
52 | | }; |
53 | | |
54 | | G_DEFINE_TYPE (TrackerSerializerTurtle, tracker_serializer_turtle, |
55 | | TRACKER_TYPE_SERIALIZER) |
56 | | |
57 | | typedef enum |
58 | | { |
59 | | TRACKER_TRIPLE_BREAK_NONE, |
60 | | TRACKER_TRIPLE_BREAK_SUBJECT, |
61 | | TRACKER_TRIPLE_BREAK_PREDICATE, |
62 | | TRACKER_TRIPLE_BREAK_OBJECT, |
63 | | } TrackerTripleBreak; |
64 | | |
65 | | static void |
66 | | tracker_triple_init_from_cursor (TrackerTriple *triple, |
67 | | TrackerSparqlCursor *cursor) |
68 | 0 | { |
69 | 0 | const gchar *langtag; |
70 | |
|
71 | 0 | triple->subject_type = tracker_sparql_cursor_get_value_type (cursor, 0); |
72 | 0 | triple->object_type = tracker_sparql_cursor_get_value_type (cursor, 2); |
73 | 0 | triple->subject = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL)); |
74 | 0 | triple->predicate = g_strdup (tracker_sparql_cursor_get_string (cursor, 1, NULL)); |
75 | 0 | triple->object = g_strdup (tracker_sparql_cursor_get_langstring (cursor, 2, &langtag, NULL)); |
76 | 0 | if (triple->object) |
77 | 0 | triple->object_langtag = g_strdup (langtag); |
78 | |
|
79 | 0 | if (triple->subject_type == TRACKER_SPARQL_VALUE_TYPE_STRING) { |
80 | 0 | if (g_str_has_prefix (triple->subject, "urn:bnode:")) { |
81 | 0 | triple->subject_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE; |
82 | 0 | } else { |
83 | 0 | triple->subject_type = TRACKER_SPARQL_VALUE_TYPE_URI; |
84 | 0 | } |
85 | 0 | } |
86 | |
|
87 | 0 | if (triple->object_type == TRACKER_SPARQL_VALUE_TYPE_STRING) { |
88 | 0 | if (g_str_has_prefix (triple->object, "urn:bnode:")) { |
89 | 0 | triple->object_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE; |
90 | 0 | } |
91 | 0 | } |
92 | 0 | } |
93 | | |
94 | | static void |
95 | | tracker_triple_clear (TrackerTriple *triple) |
96 | 0 | { |
97 | 0 | g_clear_pointer (&triple->subject, g_free); |
98 | 0 | g_clear_pointer (&triple->predicate, g_free); |
99 | 0 | g_clear_pointer (&triple->object, g_free); |
100 | 0 | g_clear_pointer (&triple->object_langtag, g_free); |
101 | 0 | } |
102 | | |
103 | | static TrackerTripleBreak |
104 | | tracker_triple_get_break (TrackerTriple *last, |
105 | | TrackerTriple *cur) |
106 | 0 | { |
107 | 0 | if (!last->subject) |
108 | 0 | return TRACKER_TRIPLE_BREAK_NONE; |
109 | | |
110 | 0 | if (g_strcmp0 (last->subject, cur->subject) != 0) |
111 | 0 | return TRACKER_TRIPLE_BREAK_SUBJECT; |
112 | | |
113 | 0 | if (g_strcmp0 (last->predicate, cur->predicate) != 0) |
114 | 0 | return TRACKER_TRIPLE_BREAK_PREDICATE; |
115 | | |
116 | 0 | return TRACKER_TRIPLE_BREAK_OBJECT; |
117 | 0 | } |
118 | | |
119 | | static void |
120 | | tracker_serializer_turtle_finalize (GObject *object) |
121 | 0 | { |
122 | 0 | g_input_stream_close (G_INPUT_STREAM (object), NULL, NULL); |
123 | |
|
124 | 0 | G_OBJECT_CLASS (tracker_serializer_turtle_parent_class)->finalize (object); |
125 | 0 | } |
126 | | |
127 | | static void |
128 | | print_value (GString *str, |
129 | | const gchar *value, |
130 | | TrackerSparqlValueType value_type, |
131 | | TrackerNamespaceManager *namespaces) |
132 | 0 | { |
133 | 0 | switch (value_type) { |
134 | 0 | case TRACKER_SPARQL_VALUE_TYPE_URI: { |
135 | 0 | gchar *shortname; |
136 | |
|
137 | 0 | shortname = tracker_namespace_manager_compress_uri (namespaces, value); |
138 | |
|
139 | 0 | if (shortname) { |
140 | 0 | g_string_append (str, shortname); |
141 | 0 | } else { |
142 | 0 | g_string_append_c (str, '<'); |
143 | 0 | g_string_append (str, value); |
144 | 0 | g_string_append_c (str, '>'); |
145 | 0 | } |
146 | |
|
147 | 0 | g_free (shortname); |
148 | 0 | break; |
149 | 0 | } |
150 | 0 | case TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE: { |
151 | 0 | gchar *bnode_label; |
152 | |
|
153 | 0 | if (g_str_has_prefix (value, "_:")) { |
154 | 0 | g_string_append (str, value); |
155 | 0 | } else { |
156 | 0 | bnode_label = g_strdelimit (g_strdup (value), ":", '_'); |
157 | 0 | g_string_append (str, "_:"); |
158 | 0 | g_string_append (str, bnode_label); |
159 | 0 | g_free (bnode_label); |
160 | 0 | } |
161 | 0 | break; |
162 | 0 | } |
163 | 0 | case TRACKER_SPARQL_VALUE_TYPE_STRING: |
164 | 0 | case TRACKER_SPARQL_VALUE_TYPE_DATETIME: { |
165 | 0 | gchar *escaped; |
166 | |
|
167 | 0 | escaped = tracker_sparql_escape_string (value); |
168 | 0 | g_string_append_c (str, '"'); |
169 | 0 | g_string_append (str, escaped); |
170 | 0 | g_string_append_c (str, '"'); |
171 | 0 | g_free (escaped); |
172 | 0 | break; |
173 | 0 | } |
174 | 0 | case TRACKER_SPARQL_VALUE_TYPE_INTEGER: |
175 | 0 | case TRACKER_SPARQL_VALUE_TYPE_DOUBLE: |
176 | 0 | g_string_append (str, value); |
177 | 0 | break; |
178 | 0 | case TRACKER_SPARQL_VALUE_TYPE_BOOLEAN: |
179 | 0 | g_string_append (str, |
180 | 0 | (value[0] == 't' || value[0] == 'T') ? |
181 | 0 | "true" : "false"); |
182 | 0 | break; |
183 | 0 | default: |
184 | 0 | g_assert_not_reached (); |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | static gboolean |
189 | | serialize_up_to_size (TrackerSerializerTurtle *serializer_ttl, |
190 | | gsize size, |
191 | | GCancellable *cancellable, |
192 | | GError **error) |
193 | 0 | { |
194 | 0 | TrackerSparqlCursor *cursor; |
195 | 0 | TrackerNamespaceManager *namespaces; |
196 | 0 | GError *inner_error = NULL; |
197 | 0 | TrackerTriple cur; |
198 | |
|
199 | 0 | if (!serializer_ttl->data) |
200 | 0 | serializer_ttl->data = g_string_new (NULL); |
201 | |
|
202 | 0 | cursor = tracker_serializer_get_cursor (TRACKER_SERIALIZER (serializer_ttl)); |
203 | 0 | namespaces = tracker_serializer_get_namespaces (TRACKER_SERIALIZER (serializer_ttl)); |
204 | |
|
205 | 0 | if (!serializer_ttl->head_printed) { |
206 | 0 | gchar *str; |
207 | |
|
208 | 0 | str = tracker_namespace_manager_print_turtle (namespaces); |
209 | |
|
210 | 0 | g_string_append (serializer_ttl->data, str); |
211 | 0 | g_string_append_c (serializer_ttl->data, '\n'); |
212 | 0 | g_free (str); |
213 | 0 | serializer_ttl->head_printed = TRUE; |
214 | 0 | } |
215 | |
|
216 | 0 | while (!serializer_ttl->cursor_finished && |
217 | 0 | serializer_ttl->data->len < size) { |
218 | 0 | TrackerTripleBreak br; |
219 | |
|
220 | 0 | if (!tracker_sparql_cursor_next (cursor, cancellable, &inner_error)) { |
221 | 0 | if (inner_error) { |
222 | 0 | g_propagate_error (error, inner_error); |
223 | 0 | return FALSE; |
224 | 0 | } else { |
225 | 0 | serializer_ttl->cursor_finished = TRUE; |
226 | 0 | break; |
227 | 0 | } |
228 | 0 | } else { |
229 | 0 | serializer_ttl->cursor_started = TRUE; |
230 | 0 | } |
231 | | |
232 | 0 | tracker_triple_init_from_cursor (&cur, cursor); |
233 | |
|
234 | 0 | if (!cur.subject || !cur.predicate || !cur.object) { |
235 | 0 | g_set_error (error, |
236 | 0 | TRACKER_SPARQL_ERROR, |
237 | 0 | TRACKER_SPARQL_ERROR_INTERNAL, |
238 | 0 | "Cursor has no subject/predicate/object columns"); |
239 | 0 | tracker_triple_clear (&cur); |
240 | 0 | return FALSE; |
241 | 0 | } |
242 | | |
243 | 0 | br = tracker_triple_get_break (&serializer_ttl->last_triple, &cur); |
244 | |
|
245 | 0 | if (br <= TRACKER_TRIPLE_BREAK_SUBJECT) { |
246 | 0 | if (br == TRACKER_TRIPLE_BREAK_SUBJECT) |
247 | 0 | g_string_append (serializer_ttl->data, " .\n\n"); |
248 | 0 | print_value (serializer_ttl->data, cur.subject, cur.subject_type, namespaces); |
249 | 0 | } |
250 | |
|
251 | 0 | if (br <= TRACKER_TRIPLE_BREAK_PREDICATE) { |
252 | 0 | if (br == TRACKER_TRIPLE_BREAK_PREDICATE) |
253 | 0 | g_string_append (serializer_ttl->data, " ;\n "); |
254 | 0 | else |
255 | 0 | g_string_append_c (serializer_ttl->data, ' '); |
256 | |
|
257 | 0 | print_value (serializer_ttl->data, cur.predicate, |
258 | 0 | TRACKER_SPARQL_VALUE_TYPE_URI, namespaces); |
259 | 0 | } |
260 | |
|
261 | 0 | if (br <= TRACKER_TRIPLE_BREAK_OBJECT) { |
262 | 0 | if (br == TRACKER_TRIPLE_BREAK_OBJECT) |
263 | 0 | g_string_append (serializer_ttl->data, ","); |
264 | |
|
265 | 0 | g_string_append_c (serializer_ttl->data, ' '); |
266 | 0 | print_value (serializer_ttl->data, cur.object, cur.object_type, namespaces); |
267 | |
|
268 | 0 | if (cur.object_langtag) { |
269 | 0 | g_string_append_c (serializer_ttl->data, '@'); |
270 | 0 | g_string_append (serializer_ttl->data, cur.object_langtag); |
271 | 0 | } |
272 | 0 | } |
273 | |
|
274 | 0 | serializer_ttl->has_triples = TRUE; |
275 | 0 | tracker_triple_clear (&serializer_ttl->last_triple); |
276 | 0 | memcpy (&serializer_ttl->last_triple, &cur, sizeof (TrackerTriple)); |
277 | 0 | } |
278 | | |
279 | | /* Print dot for the last triple */ |
280 | 0 | if (serializer_ttl->cursor_finished && |
281 | 0 | serializer_ttl->has_triples) |
282 | 0 | g_string_append (serializer_ttl->data, " .\n"); |
283 | |
|
284 | 0 | return TRUE; |
285 | 0 | } |
286 | | |
287 | | static gssize |
288 | | tracker_serializer_turtle_read (GInputStream *istream, |
289 | | gpointer buffer, |
290 | | gsize count, |
291 | | GCancellable *cancellable, |
292 | | GError **error) |
293 | 0 | { |
294 | 0 | TrackerSerializerTurtle *serializer_ttl = TRACKER_SERIALIZER_TURTLE (istream); |
295 | 0 | gsize bytes_copied; |
296 | |
|
297 | 0 | if (serializer_ttl->stream_closed || |
298 | 0 | (serializer_ttl->cursor_finished && |
299 | 0 | serializer_ttl->data->len == 0)) |
300 | 0 | return 0; |
301 | | |
302 | 0 | if (!serialize_up_to_size (serializer_ttl, |
303 | 0 | count, |
304 | 0 | cancellable, |
305 | 0 | error)) |
306 | 0 | return -1; |
307 | | |
308 | 0 | bytes_copied = MIN (count, serializer_ttl->data->len); |
309 | |
|
310 | 0 | memcpy (buffer, |
311 | 0 | serializer_ttl->data->str, |
312 | 0 | bytes_copied); |
313 | 0 | g_string_erase (serializer_ttl->data, 0, bytes_copied); |
314 | |
|
315 | 0 | return bytes_copied; |
316 | 0 | } |
317 | | |
318 | | static gboolean |
319 | | tracker_serializer_turtle_close (GInputStream *istream, |
320 | | GCancellable *cancellable, |
321 | | GError **error) |
322 | 0 | { |
323 | 0 | TrackerSerializerTurtle *serializer_ttl = TRACKER_SERIALIZER_TURTLE (istream); |
324 | |
|
325 | 0 | tracker_triple_clear (&serializer_ttl->last_triple); |
326 | |
|
327 | 0 | if (serializer_ttl->data) { |
328 | 0 | g_string_free (serializer_ttl->data, TRUE); |
329 | 0 | serializer_ttl->data = NULL; |
330 | 0 | } |
331 | |
|
332 | 0 | return TRUE; |
333 | 0 | } |
334 | | |
335 | | static void |
336 | | tracker_serializer_turtle_class_init (TrackerSerializerTurtleClass *klass) |
337 | 0 | { |
338 | 0 | GObjectClass *object_class = G_OBJECT_CLASS (klass); |
339 | 0 | GInputStreamClass *istream_class = G_INPUT_STREAM_CLASS (klass); |
340 | |
|
341 | 0 | object_class->finalize = tracker_serializer_turtle_finalize; |
342 | |
|
343 | 0 | istream_class->read_fn = tracker_serializer_turtle_read; |
344 | 0 | istream_class->close_fn = tracker_serializer_turtle_close; |
345 | 0 | } |
346 | | |
347 | | static void |
348 | | tracker_serializer_turtle_init (TrackerSerializerTurtle *serializer) |
349 | 0 | { |
350 | 0 | } |