/src/tinysparql/src/libtinysparql/tracker-deserializer-json-ld.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2022, Red Hat Inc. |
3 | | * |
4 | | * This library is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Lesser General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2.1 of the License, or (at your option) any later version. |
8 | | * |
9 | | * This library is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | | * Lesser General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Lesser General Public |
15 | | * License along with this library; if not, write to the |
16 | | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
17 | | * Boston, MA 02110-1301, USA. |
18 | | * |
19 | | * Author: Carlos Garnacho <carlosg@gnome.org> |
20 | | */ |
21 | | |
22 | | /* Deserialization to cursors for the JSON format defined at: |
23 | | * https://www.w3.org/TR/json-ld/ |
24 | | */ |
25 | | |
26 | | #include "config.h" |
27 | | |
28 | | #include "tracker-deserializer-json-ld.h" |
29 | | |
30 | | #include <json-glib/json-glib.h> |
31 | | |
32 | | enum { |
33 | | STATE_INITIAL, |
34 | | STATE_ROOT_LIST, |
35 | | STATE_MAYBE_GRAPH, |
36 | | STATE_OBJECT_LIST, |
37 | | STATE_PROPERTIES, |
38 | | STATE_VALUE_LIST, |
39 | | STATE_VALUE, |
40 | | STATE_VALUE_AS_OBJECT, |
41 | | STATE_FINAL, |
42 | | }; |
43 | | |
44 | | enum { |
45 | | STACK_ARRAY, |
46 | | STACK_OBJECT, |
47 | | }; |
48 | | |
49 | | typedef struct { |
50 | | guint type; |
51 | | guint state; |
52 | | union { |
53 | | struct { |
54 | | gint idx; |
55 | | guint elements; |
56 | | } array; |
57 | | struct { |
58 | | gint idx; |
59 | | gchar **members; |
60 | | gchar *id; |
61 | | gboolean is_graph; |
62 | | } object; |
63 | | } data; |
64 | | } StateStack; |
65 | | |
66 | | struct _TrackerDeserializerJsonLD { |
67 | | TrackerDeserializer parent_instance; |
68 | | JsonParser *parser; |
69 | | JsonReader *reader; |
70 | | GArray *state_stack; |
71 | | gchar *default_lang; |
72 | | gchar *cur_graph; |
73 | | gchar *cur_subject; |
74 | | gchar *cur_predicate; |
75 | | gchar *cur_object; |
76 | | gchar *cur_object_lang; |
77 | | TrackerSparqlValueType object_type; |
78 | | guint state; |
79 | | gboolean has_row; |
80 | | guint blank_node_idx; |
81 | | GError *init_error; |
82 | | }; |
83 | | |
84 | 25.5k | G_DEFINE_TYPE (TrackerDeserializerJsonLD, |
85 | 25.5k | tracker_deserializer_json_ld, |
86 | 25.5k | TRACKER_TYPE_DESERIALIZER_RDF) |
87 | 25.5k | |
88 | 25.5k | static void |
89 | 25.5k | tracker_deserializer_json_ld_finalize (GObject *object) |
90 | 25.5k | { |
91 | 8.51k | TrackerDeserializerJsonLD *deserializer = |
92 | 8.51k | TRACKER_DESERIALIZER_JSON_LD (object); |
93 | | |
94 | 8.51k | tracker_sparql_cursor_close (TRACKER_SPARQL_CURSOR (deserializer)); |
95 | | |
96 | 8.51k | g_clear_object (&deserializer->reader); |
97 | 8.51k | g_clear_object (&deserializer->parser); |
98 | 8.51k | g_array_unref (deserializer->state_stack); |
99 | 8.51k | g_clear_pointer (&deserializer->default_lang, g_free); |
100 | 8.51k | g_clear_pointer (&deserializer->cur_graph, g_free); |
101 | 8.51k | g_clear_pointer (&deserializer->cur_subject, g_free); |
102 | 8.51k | g_clear_pointer (&deserializer->cur_predicate, g_free); |
103 | 8.51k | g_clear_pointer (&deserializer->cur_object, g_free); |
104 | 8.51k | g_clear_pointer (&deserializer->cur_object_lang, g_free); |
105 | | |
106 | 8.51k | G_OBJECT_CLASS (tracker_deserializer_json_ld_parent_class)->finalize (object); |
107 | 8.51k | } |
108 | | |
109 | | static void |
110 | | tracker_deserializer_json_ld_constructed (GObject *object) |
111 | 8.51k | { |
112 | 8.51k | TrackerDeserializerJsonLD *deserializer = |
113 | 8.51k | TRACKER_DESERIALIZER_JSON_LD (object); |
114 | 8.51k | GInputStream *stream; |
115 | | |
116 | 8.51k | G_OBJECT_CLASS (tracker_deserializer_json_ld_parent_class)->constructed (object); |
117 | | |
118 | 8.51k | stream = tracker_deserializer_get_stream (TRACKER_DESERIALIZER (object)); |
119 | | |
120 | 8.51k | if (json_parser_load_from_stream (deserializer->parser, |
121 | 8.51k | stream, |
122 | 8.51k | NULL, |
123 | 8.51k | &deserializer->init_error)) { |
124 | 7.24k | JsonNode *root; |
125 | | |
126 | 7.24k | root = json_parser_get_root (deserializer->parser); |
127 | 7.24k | deserializer->reader = json_reader_new (root); |
128 | 7.24k | } |
129 | 8.51k | } |
130 | | |
131 | | static void |
132 | | state_clear (gpointer user_data) |
133 | 1.94M | { |
134 | 1.94M | StateStack *elem = user_data; |
135 | | |
136 | 1.94M | if (elem->type == STACK_OBJECT) { |
137 | 1.79M | g_strfreev (elem->data.object.members); |
138 | 1.79M | g_free (elem->data.object.id); |
139 | 1.79M | } |
140 | 1.94M | } |
141 | | |
142 | | static gboolean |
143 | | advance_stack (TrackerDeserializerJsonLD *deserializer) |
144 | 4.11M | { |
145 | 4.11M | StateStack *elem; |
146 | | |
147 | 4.11M | g_assert (deserializer->state_stack->len > 0); |
148 | | |
149 | 4.11M | elem = &g_array_index (deserializer->state_stack, |
150 | 4.11M | StateStack, |
151 | 4.11M | deserializer->state_stack->len - 1); |
152 | | |
153 | 4.11M | if (elem->type == STACK_ARRAY) { |
154 | 957k | if (elem->data.array.idx >= 0) |
155 | 807k | json_reader_end_element (deserializer->reader); |
156 | | |
157 | 957k | elem->data.array.idx++; |
158 | | |
159 | 957k | if (elem->data.array.idx >= (gint) elem->data.array.elements) |
160 | 141k | return FALSE; |
161 | | |
162 | 815k | return json_reader_read_element (deserializer->reader, |
163 | 815k | elem->data.array.idx); |
164 | 3.15M | } else if (elem->type == STACK_OBJECT) { |
165 | 3.15M | if (elem->data.object.idx >= 0) |
166 | 1.48M | json_reader_end_member (deserializer->reader); |
167 | | |
168 | 3.15M | elem->data.object.idx++; |
169 | | |
170 | 3.15M | if (elem->data.object.members[elem->data.array.idx] == NULL) |
171 | 1.65M | return FALSE; |
172 | | |
173 | 1.50M | return json_reader_read_member (deserializer->reader, |
174 | 1.50M | elem->data.object.members[elem->data.array.idx]); |
175 | 3.15M | } |
176 | | |
177 | 0 | return FALSE; |
178 | 4.11M | } |
179 | | |
180 | | static void |
181 | | push_stack (TrackerDeserializerJsonLD *deserializer, |
182 | | guint state) |
183 | 1.94M | { |
184 | 1.94M | StateStack elem = { 0 }; |
185 | 1.94M | const gchar *id = NULL; |
186 | | |
187 | 1.94M | if (json_reader_is_array (deserializer->reader)) { |
188 | 149k | elem.type = STACK_ARRAY; |
189 | 149k | elem.data.array.idx = -1; |
190 | 149k | elem.data.array.elements = |
191 | 149k | json_reader_count_elements (deserializer->reader); |
192 | 1.79M | } else if (json_reader_is_object (deserializer->reader)) { |
193 | 1.79M | elem.type = STACK_OBJECT; |
194 | 1.79M | elem.data.object.idx = -1; |
195 | 1.79M | elem.data.object.members = |
196 | 1.79M | json_reader_list_members (deserializer->reader); |
197 | | |
198 | 1.79M | elem.data.object.is_graph = |
199 | 1.79M | json_reader_read_member (deserializer->reader, "@graph"); |
200 | 1.79M | json_reader_end_member (deserializer->reader); |
201 | | |
202 | 1.79M | if (json_reader_read_member (deserializer->reader, "@id")) |
203 | 157k | id = json_reader_get_string_value (deserializer->reader); |
204 | 1.79M | json_reader_end_member (deserializer->reader); |
205 | | |
206 | 1.79M | if (id) { |
207 | 156k | TrackerNamespaceManager *namespaces; |
208 | | |
209 | 156k | namespaces = tracker_deserializer_get_namespaces (TRACKER_DESERIALIZER (deserializer)); |
210 | 156k | elem.data.object.id = |
211 | 156k | tracker_namespace_manager_expand_uri (namespaces, id); |
212 | 156k | } |
213 | 1.79M | } else { |
214 | 0 | g_assert_not_reached (); |
215 | 0 | } |
216 | | |
217 | 1.94M | elem.state = state; |
218 | 1.94M | g_array_append_val (deserializer->state_stack, elem); |
219 | 1.94M | deserializer->state = state; |
220 | 1.94M | } |
221 | | |
222 | | static void |
223 | | pop_stack (TrackerDeserializerJsonLD *deserializer) |
224 | 1.91M | { |
225 | 1.91M | StateStack *elem; |
226 | | |
227 | 1.91M | g_assert (deserializer->state_stack->len > 0); |
228 | | |
229 | 1.91M | g_array_set_size (deserializer->state_stack, |
230 | 1.91M | deserializer->state_stack->len - 1); |
231 | | |
232 | 1.91M | if (deserializer->state_stack->len > 0) { |
233 | 1.91M | elem = &g_array_index (deserializer->state_stack, |
234 | 1.91M | StateStack, |
235 | 1.91M | deserializer->state_stack->len - 1); |
236 | 1.91M | deserializer->state = elem->state; |
237 | 1.91M | } else { |
238 | 2.73k | deserializer->state = STATE_FINAL; |
239 | 2.73k | } |
240 | 1.91M | } |
241 | | |
242 | | static guint |
243 | | stack_state (TrackerDeserializerJsonLD *deserializer) |
244 | 419k | { |
245 | 419k | StateStack *elem; |
246 | | |
247 | 419k | g_assert (deserializer->state_stack->len > 0); |
248 | | |
249 | 419k | elem = &g_array_index (deserializer->state_stack, |
250 | 419k | StateStack, |
251 | 419k | deserializer->state_stack->len - 1); |
252 | | |
253 | 419k | return elem->state; |
254 | 419k | } |
255 | | |
256 | | static const gchar * |
257 | | current_member (TrackerDeserializerJsonLD *deserializer) |
258 | 2.46M | { |
259 | 2.46M | StateStack *elem; |
260 | 2.46M | gint i; |
261 | | |
262 | 2.46M | g_assert (deserializer->state_stack->len > 0); |
263 | | |
264 | 2.46M | for (i = (gint) deserializer->state_stack->len - 1; i >= 0; i--) { |
265 | 2.46M | elem = &g_array_index (deserializer->state_stack, |
266 | 2.46M | StateStack, i); |
267 | | |
268 | 2.46M | if (elem->type == STACK_OBJECT) { |
269 | 2.46M | return elem->data.object.idx >= 0 ? |
270 | 2.46M | elem->data.object.members[elem->data.object.idx] : |
271 | 2.46M | NULL; |
272 | 2.46M | } |
273 | 2.46M | } |
274 | | |
275 | 0 | return NULL; |
276 | 2.46M | } |
277 | | |
278 | | static const gchar * |
279 | | current_id (TrackerDeserializerJsonLD *deserializer) |
280 | 2.94M | { |
281 | 2.94M | StateStack *elem; |
282 | 2.94M | gint i; |
283 | | |
284 | 2.94M | g_assert (deserializer->state_stack->len > 0); |
285 | | |
286 | 243M | for (i = (gint) deserializer->state_stack->len - 1; i >= 0; i--) { |
287 | 241M | elem = &g_array_index (deserializer->state_stack, |
288 | 241M | StateStack, i); |
289 | 241M | if (elem->type == STACK_OBJECT && |
290 | 238M | !elem->data.object.is_graph && |
291 | 237M | elem->data.object.id) |
292 | 132k | return elem->data.object.id; |
293 | 241M | } |
294 | | |
295 | 2.81M | return NULL; |
296 | 2.94M | } |
297 | | |
298 | | static const gchar * |
299 | | current_graph (TrackerDeserializerJsonLD *deserializer) |
300 | 128k | { |
301 | 128k | StateStack *elem; |
302 | 128k | gint i; |
303 | | |
304 | 128k | g_assert (deserializer->state_stack->len > 0); |
305 | | |
306 | 175k | for (i = (gint) deserializer->state_stack->len - 1; i >= 0; i--) { |
307 | 174k | elem = &g_array_index (deserializer->state_stack, |
308 | 174k | StateStack, i); |
309 | 174k | if (elem->type == STACK_OBJECT && |
310 | 151k | elem->data.object.is_graph) |
311 | 128k | return elem->data.object.id; |
312 | 174k | } |
313 | | |
314 | 96 | return NULL; |
315 | 128k | } |
316 | | |
317 | | static gchar * |
318 | | object_to_value (TrackerDeserializerJsonLD *deserializer, |
319 | | TrackerNamespaceManager *namespaces, |
320 | | gchar **langtag, |
321 | | TrackerSparqlValueType *value_type) |
322 | 22 | { |
323 | 22 | const gchar *value = NULL, *type = NULL; |
324 | | |
325 | 22 | if (json_reader_read_member (deserializer->reader, "@value")) |
326 | 22 | value = json_reader_get_string_value (deserializer->reader); |
327 | 22 | json_reader_end_member (deserializer->reader); |
328 | | |
329 | 22 | if (json_reader_read_member (deserializer->reader, "@language")) |
330 | 2 | *langtag = g_strdup (json_reader_get_string_value (deserializer->reader)); |
331 | 22 | json_reader_end_member (deserializer->reader); |
332 | | |
333 | 22 | if (json_reader_read_member (deserializer->reader, "@type")) |
334 | 12 | type = json_reader_get_string_value (deserializer->reader); |
335 | 22 | json_reader_end_member (deserializer->reader); |
336 | | |
337 | 22 | if (g_strcmp0 (type, TRACKER_PREFIX_XSD "string") == 0 || |
338 | 22 | g_strcmp0 (type, TRACKER_PREFIX_RDF "langString") == 0) |
339 | 0 | *value_type = TRACKER_SPARQL_VALUE_TYPE_STRING; |
340 | 22 | else if (g_strcmp0 (type, TRACKER_PREFIX_XSD "integer") == 0) |
341 | 0 | *value_type = TRACKER_SPARQL_VALUE_TYPE_INTEGER; |
342 | 22 | else if (g_strcmp0 (type, TRACKER_PREFIX_XSD "boolean") == 0) |
343 | 0 | *value_type = TRACKER_SPARQL_VALUE_TYPE_BOOLEAN; |
344 | 22 | else if (g_strcmp0 (type, TRACKER_PREFIX_XSD "double") == 0) |
345 | 0 | *value_type = TRACKER_SPARQL_VALUE_TYPE_DOUBLE; |
346 | 22 | else if (g_strcmp0 (type, TRACKER_PREFIX_XSD "date") == 0 || |
347 | 22 | g_strcmp0 (type, TRACKER_PREFIX_XSD "dateTime") == 0) |
348 | 0 | *value_type = TRACKER_SPARQL_VALUE_TYPE_DATETIME; |
349 | 22 | else |
350 | 22 | *value_type = TRACKER_SPARQL_VALUE_TYPE_STRING; |
351 | | |
352 | 22 | return g_strdup (value); |
353 | 22 | } |
354 | | |
355 | | static gchar * |
356 | | node_to_value (JsonNode *node, |
357 | | TrackerNamespaceManager *namespaces, |
358 | | TrackerSparqlValueType *value_type) |
359 | 419k | { |
360 | 419k | GValue value = G_VALUE_INIT; |
361 | 419k | GType type; |
362 | 419k | gchar *str = NULL; |
363 | | |
364 | 419k | json_node_get_value (node, &value); |
365 | 419k | type = json_node_get_value_type (node); |
366 | | |
367 | 419k | if (type == G_TYPE_INT64) { |
368 | 895 | *value_type = TRACKER_SPARQL_VALUE_TYPE_INTEGER; |
369 | 895 | str = g_strdup_printf ("%" G_GINT64_FORMAT, g_value_get_int64 (&value)); |
370 | 418k | } else if (type == G_TYPE_STRING) { |
371 | 417k | *value_type = TRACKER_SPARQL_VALUE_TYPE_STRING; |
372 | 417k | str = tracker_namespace_manager_expand_uri (namespaces, g_value_get_string (&value)); |
373 | 417k | } else if (type == G_TYPE_DOUBLE) { |
374 | 380 | gchar buf[G_ASCII_DTOSTR_BUF_SIZE]; |
375 | | |
376 | 380 | g_ascii_dtostr (buf, G_ASCII_DTOSTR_BUF_SIZE, g_value_get_double (&value)); |
377 | 380 | *value_type = TRACKER_SPARQL_VALUE_TYPE_DOUBLE; |
378 | 380 | str = g_strdup (buf); |
379 | 985 | } else if (type == G_TYPE_BOOLEAN) { |
380 | 677 | *value_type = TRACKER_SPARQL_VALUE_TYPE_BOOLEAN; |
381 | 677 | str = g_strdup (g_value_get_boolean (&value) ? "true" : "false"); |
382 | 677 | } else { |
383 | 308 | *value_type = TRACKER_SPARQL_VALUE_TYPE_UNBOUND; |
384 | 308 | } |
385 | | |
386 | 419k | g_value_unset (&value); |
387 | | |
388 | 419k | return str; |
389 | 419k | } |
390 | | |
391 | | static void |
392 | | load_special_key (TrackerDeserializerJsonLD *deserializer, |
393 | | const gchar *key) |
394 | 364 | { |
395 | 364 | const gchar *value; |
396 | | |
397 | 364 | if (json_reader_read_member (deserializer->reader, key)) { |
398 | 353 | value = json_reader_get_string_value (deserializer->reader); |
399 | | |
400 | 353 | if (g_strcmp0 (key, "@language") == 0) { |
401 | 195 | g_clear_pointer (&deserializer->default_lang, g_free); |
402 | 195 | deserializer->default_lang = g_strdup (value); |
403 | 195 | } |
404 | 353 | } |
405 | | |
406 | 364 | json_reader_end_member (deserializer->reader); |
407 | 364 | } |
408 | | |
409 | | static void |
410 | | load_context (TrackerDeserializerJsonLD *deserializer) |
411 | 2.11M | { |
412 | 2.11M | TrackerNamespaceManager *namespaces; |
413 | | |
414 | 2.11M | namespaces = tracker_deserializer_get_namespaces (TRACKER_DESERIALIZER (deserializer)); |
415 | | |
416 | 2.11M | if (json_reader_read_member (deserializer->reader, "@context")) { |
417 | 661 | gchar **members = json_reader_list_members (deserializer->reader); |
418 | 661 | guint i; |
419 | | |
420 | 2.16k | for (i = 0; members && members[i] != NULL; i++) { |
421 | 1.50k | if (members[i][0] == '@') { |
422 | 364 | load_special_key (deserializer, members[i]); |
423 | 364 | continue; |
424 | 364 | } |
425 | | |
426 | 1.13k | if (tracker_namespace_manager_lookup_prefix (namespaces, members[i])) |
427 | 565 | continue; |
428 | | |
429 | 571 | if (json_reader_read_member (deserializer->reader, members[i])) { |
430 | 403 | const gchar *expanded = json_reader_get_string_value (deserializer->reader); |
431 | 403 | tracker_namespace_manager_add_prefix (namespaces, members[i], expanded); |
432 | 403 | } |
433 | | |
434 | 571 | json_reader_end_member (deserializer->reader); |
435 | 571 | } |
436 | | |
437 | 661 | g_strfreev (members); |
438 | 661 | } |
439 | | |
440 | 2.11M | json_reader_end_member (deserializer->reader); |
441 | 2.11M | } |
442 | | |
443 | | static void |
444 | | forward_state_for_value (TrackerDeserializerJsonLD *deserializer) |
445 | 1.41M | { |
446 | 1.41M | if (json_reader_is_object (deserializer->reader)) { |
447 | 993k | if (json_reader_read_member (deserializer->reader, "@value")) { |
448 | 22 | json_reader_end_member (deserializer->reader); |
449 | 22 | deserializer->state = STATE_VALUE_AS_OBJECT; |
450 | 993k | } else { |
451 | 993k | json_reader_end_member (deserializer->reader); |
452 | 993k | push_stack (deserializer, STATE_PROPERTIES); |
453 | 993k | deserializer->state = STATE_MAYBE_GRAPH; |
454 | 993k | } |
455 | 993k | } else { |
456 | 419k | deserializer->state = STATE_VALUE; |
457 | 419k | } |
458 | 1.41M | } |
459 | | |
460 | | static gboolean |
461 | | forward_state (TrackerDeserializerJsonLD *deserializer, |
462 | | GError **error) |
463 | 6.65M | { |
464 | 6.65M | TrackerNamespaceManager *namespaces; |
465 | 6.65M | const gchar *member; |
466 | | |
467 | 6.65M | namespaces = tracker_deserializer_get_namespaces (TRACKER_DESERIALIZER (deserializer)); |
468 | | |
469 | 6.65M | switch (deserializer->state) { |
470 | 7.24k | case STATE_INITIAL: |
471 | 7.24k | if (json_reader_is_array (deserializer->reader)) { |
472 | 2.77k | push_stack (deserializer, STATE_ROOT_LIST); |
473 | 4.46k | } else if (json_reader_is_object (deserializer->reader)) { |
474 | 4.34k | push_stack (deserializer, STATE_PROPERTIES); |
475 | 4.34k | deserializer->state = STATE_MAYBE_GRAPH; |
476 | 4.34k | } |
477 | 7.24k | break; |
478 | 671k | case STATE_ROOT_LIST: |
479 | 671k | if (!advance_stack (deserializer)) { |
480 | 1.96k | pop_stack (deserializer); |
481 | 1.96k | break; |
482 | 1.96k | } |
483 | | |
484 | 669k | if (json_reader_is_object (deserializer->reader)) { |
485 | 669k | push_stack (deserializer, STATE_MAYBE_GRAPH); |
486 | 669k | } else { |
487 | 48 | g_set_error (error, |
488 | 48 | TRACKER_SPARQL_ERROR, |
489 | 48 | TRACKER_SPARQL_ERROR_PARSE, |
490 | 48 | "Expected graph or resource object"); |
491 | 48 | return FALSE; |
492 | 48 | } |
493 | 669k | break; |
494 | 2.11M | case STATE_MAYBE_GRAPH: |
495 | 2.11M | load_context (deserializer); |
496 | | |
497 | 2.11M | if (json_reader_read_member (deserializer->reader, "@graph")) { |
498 | 128k | g_clear_pointer (&deserializer->cur_graph, g_free); |
499 | 128k | deserializer->cur_graph = g_strdup (current_graph (deserializer)); |
500 | | |
501 | 128k | if (json_reader_is_array (deserializer->reader)) { |
502 | 128k | push_stack (deserializer, STATE_OBJECT_LIST); |
503 | 128k | } else { |
504 | 17 | g_set_error (error, |
505 | 17 | TRACKER_SPARQL_ERROR, |
506 | 17 | TRACKER_SPARQL_ERROR_PARSE, |
507 | 17 | "Expected resource list"); |
508 | 17 | return FALSE; |
509 | 17 | } |
510 | 1.98M | } else { |
511 | 1.98M | json_reader_end_member (deserializer->reader); |
512 | 1.98M | g_clear_pointer (&deserializer->cur_subject, g_free); |
513 | 1.98M | deserializer->cur_subject = g_strdup (current_id (deserializer)); |
514 | 1.98M | deserializer->state = STATE_PROPERTIES; |
515 | 1.98M | } |
516 | 2.11M | break; |
517 | 2.11M | case STATE_OBJECT_LIST: |
518 | 252k | if (!advance_stack (deserializer)) { |
519 | | /* Pop the graph array, close manually the @graph |
520 | | * member, and pop the graph object too |
521 | | */ |
522 | 125k | pop_stack (deserializer); |
523 | 125k | json_reader_end_member (deserializer->reader); |
524 | 125k | pop_stack (deserializer); |
525 | 125k | break; |
526 | 125k | } |
527 | | |
528 | 127k | if (json_reader_is_object (deserializer->reader)) { |
529 | 127k | push_stack (deserializer, STATE_PROPERTIES); |
530 | 127k | deserializer->state = STATE_MAYBE_GRAPH; |
531 | 127k | } else { |
532 | 3 | g_set_error (error, |
533 | 3 | TRACKER_SPARQL_ERROR, |
534 | 3 | TRACKER_SPARQL_ERROR_PARSE, |
535 | 3 | "Expected resource object"); |
536 | 3 | return FALSE; |
537 | 3 | } |
538 | 127k | break; |
539 | 3.15M | case STATE_PROPERTIES: |
540 | 3.15M | if (!advance_stack (deserializer)) { |
541 | 1.65M | pop_stack (deserializer); |
542 | | |
543 | 1.65M | if (deserializer->state == STATE_PROPERTIES || |
544 | 962k | deserializer->state == STATE_VALUE_LIST) { |
545 | 962k | gchar *nested_object_id; |
546 | | |
547 | | /* The state popped belonged to a nested object, |
548 | | * switch subject/predicate back to the parent |
549 | | * object, and finalize the property that defined it. |
550 | | */ |
551 | 962k | nested_object_id = g_steal_pointer (&deserializer->cur_subject); |
552 | | |
553 | 962k | deserializer->cur_subject = g_strdup (current_id (deserializer)); |
554 | 962k | g_clear_pointer (&deserializer->cur_predicate, g_free); |
555 | 962k | deserializer->cur_predicate = |
556 | 962k | tracker_namespace_manager_expand_uri (namespaces, current_member (deserializer)); |
557 | 962k | g_clear_pointer (&deserializer->cur_object, g_free); |
558 | 962k | g_clear_pointer (&deserializer->cur_object_lang, g_free); |
559 | 962k | deserializer->cur_object = nested_object_id; |
560 | 962k | deserializer->object_type = TRACKER_SPARQL_VALUE_TYPE_STRING; |
561 | 962k | deserializer->has_row = TRUE; |
562 | 962k | } |
563 | | |
564 | 1.65M | break; |
565 | 1.65M | } |
566 | | |
567 | 1.50M | member = current_member (deserializer); |
568 | 1.50M | g_clear_pointer (&deserializer->cur_predicate, g_free); |
569 | | |
570 | 1.50M | if (g_strcmp0 (member, "@type") == 0) |
571 | 351 | deserializer->cur_predicate = g_strdup (TRACKER_PREFIX_RDF "type"); |
572 | 1.50M | else if (member[0] != '@') |
573 | 1.41M | deserializer->cur_predicate = tracker_namespace_manager_expand_uri (namespaces, member); |
574 | 90.4k | else |
575 | 90.4k | break; |
576 | | |
577 | 1.41M | if (json_reader_is_array (deserializer->reader)) |
578 | 17.8k | push_stack (deserializer, STATE_VALUE_LIST); |
579 | 1.39M | else |
580 | 1.39M | forward_state_for_value (deserializer); |
581 | 1.41M | break; |
582 | 32.9k | case STATE_VALUE_LIST: |
583 | 32.9k | if (!advance_stack (deserializer)) { |
584 | 14.3k | pop_stack (deserializer); |
585 | 14.3k | break; |
586 | 14.3k | } |
587 | | |
588 | 18.6k | forward_state_for_value (deserializer); |
589 | 18.6k | break; |
590 | 22 | case STATE_VALUE_AS_OBJECT: |
591 | 22 | g_clear_pointer (&deserializer->cur_object, g_free); |
592 | 22 | g_clear_pointer (&deserializer->cur_object_lang, g_free); |
593 | 22 | deserializer->cur_object = object_to_value (deserializer, |
594 | 22 | namespaces, |
595 | 22 | &deserializer->cur_object_lang, |
596 | 22 | &deserializer->object_type); |
597 | 22 | deserializer->has_row = TRUE; |
598 | | |
599 | 22 | deserializer->state = stack_state (deserializer); |
600 | 22 | break; |
601 | 419k | case STATE_VALUE: |
602 | 419k | g_clear_pointer (&deserializer->cur_object, g_free); |
603 | 419k | g_clear_pointer (&deserializer->cur_object_lang, g_free); |
604 | 419k | deserializer->cur_object = node_to_value (json_reader_get_value (deserializer->reader), |
605 | 419k | namespaces, |
606 | 419k | &deserializer->object_type); |
607 | 419k | deserializer->has_row = TRUE; |
608 | | |
609 | 419k | deserializer->state = stack_state (deserializer); |
610 | 419k | break; |
611 | 0 | case STATE_FINAL: |
612 | 0 | break; |
613 | 6.65M | } |
614 | | |
615 | 6.65M | return deserializer->state_stack->len > 0; |
616 | 6.65M | } |
617 | | |
618 | | static TrackerSparqlValueType |
619 | | tracker_deserializer_json_ld_get_value_type (TrackerSparqlCursor *cursor, |
620 | | gint column) |
621 | 7.17M | { |
622 | 7.17M | TrackerDeserializerJsonLD *deserializer = |
623 | 7.17M | TRACKER_DESERIALIZER_JSON_LD (cursor); |
624 | | |
625 | 7.17M | switch (column) { |
626 | 2.83M | case TRACKER_RDF_COL_SUBJECT: |
627 | 2.83M | if (!deserializer->cur_subject) |
628 | 0 | return TRACKER_SPARQL_VALUE_TYPE_UNBOUND; |
629 | 2.83M | else if (strncmp (deserializer->cur_subject, "_:", 2) == 0) |
630 | 2.61M | return TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE; |
631 | 221k | else |
632 | 221k | return TRACKER_SPARQL_VALUE_TYPE_URI; |
633 | 0 | break; |
634 | 1.38M | case TRACKER_RDF_COL_PREDICATE: |
635 | 1.38M | if (!deserializer->cur_predicate) |
636 | 202 | return TRACKER_SPARQL_VALUE_TYPE_UNBOUND; |
637 | 1.38M | else |
638 | 1.38M | return TRACKER_SPARQL_VALUE_TYPE_URI; |
639 | 0 | break; |
640 | 1.37M | case TRACKER_RDF_COL_OBJECT: |
641 | 1.37M | if (!deserializer->cur_object) |
642 | 0 | return TRACKER_SPARQL_VALUE_TYPE_UNBOUND; |
643 | 1.37M | else |
644 | 1.37M | return deserializer->object_type; |
645 | 0 | break; |
646 | 1.57M | case TRACKER_RDF_COL_GRAPH: |
647 | 1.57M | if (!deserializer->cur_graph) |
648 | 1.19M | return TRACKER_SPARQL_VALUE_TYPE_UNBOUND; |
649 | 381k | else |
650 | 381k | return TRACKER_SPARQL_VALUE_TYPE_URI; |
651 | 0 | break; |
652 | 0 | default: |
653 | 0 | return TRACKER_SPARQL_VALUE_TYPE_UNBOUND; |
654 | 7.17M | } |
655 | 7.17M | } |
656 | | |
657 | | static const gchar * |
658 | | tracker_deserializer_json_ld_get_string (TrackerSparqlCursor *cursor, |
659 | | gint column, |
660 | | const gchar **langtag, |
661 | | glong *length) |
662 | 6.90M | { |
663 | 6.90M | TrackerDeserializerJsonLD *deserializer = |
664 | 6.90M | TRACKER_DESERIALIZER_JSON_LD (cursor); |
665 | 6.90M | const gchar *str = NULL; |
666 | | |
667 | 6.90M | if (length) |
668 | 0 | *length = 0; |
669 | 6.90M | if (langtag) |
670 | 1.38M | *langtag = NULL; |
671 | | |
672 | 6.90M | switch (column) { |
673 | 1.38M | case TRACKER_RDF_COL_SUBJECT: |
674 | 1.38M | str = deserializer->cur_subject; |
675 | 1.38M | break; |
676 | 1.38M | case TRACKER_RDF_COL_PREDICATE: |
677 | 1.38M | str = deserializer->cur_predicate; |
678 | 1.38M | break; |
679 | 2.76M | case TRACKER_RDF_COL_OBJECT: |
680 | 2.76M | if (langtag) { |
681 | 1.38M | if (deserializer->cur_object_lang) |
682 | 2 | *langtag = deserializer->cur_object_lang; |
683 | 1.38M | else |
684 | 1.38M | *langtag = deserializer->default_lang; |
685 | 1.38M | } |
686 | | |
687 | 2.76M | str = deserializer->cur_object; |
688 | 2.76M | break; |
689 | 1.38M | case TRACKER_RDF_COL_GRAPH: |
690 | 1.38M | str = deserializer->cur_graph; |
691 | 1.38M | break; |
692 | 0 | default: |
693 | 0 | break; |
694 | 6.90M | } |
695 | | |
696 | 6.90M | if (length && str) |
697 | 0 | *length = strlen (str); |
698 | | |
699 | 6.90M | return str; |
700 | 6.90M | } |
701 | | |
702 | | static gboolean |
703 | | tracker_deserializer_json_ld_next (TrackerSparqlCursor *cursor, |
704 | | GCancellable *cancellable, |
705 | | GError **error) |
706 | 1.38M | { |
707 | 1.38M | TrackerDeserializerJsonLD *deserializer = |
708 | 1.38M | TRACKER_DESERIALIZER_JSON_LD (cursor); |
709 | | |
710 | 1.38M | if (deserializer->init_error) { |
711 | 1.27k | GError *init_error; |
712 | | |
713 | 1.27k | init_error = g_steal_pointer (&deserializer->init_error); |
714 | 1.27k | g_propagate_error (error, init_error); |
715 | 1.27k | return FALSE; |
716 | 1.27k | } |
717 | | |
718 | 1.38M | deserializer->has_row = FALSE; |
719 | | |
720 | 8.03M | while (!deserializer->has_row) { |
721 | 6.65M | GError *inner_error = NULL; |
722 | | |
723 | 6.65M | if (g_cancellable_set_error_if_cancelled (cancellable, error)) |
724 | 0 | return FALSE; |
725 | | |
726 | 6.65M | if (!forward_state (deserializer, &inner_error)) { |
727 | 2.92k | if (inner_error) { |
728 | 68 | g_propagate_error (error, inner_error); |
729 | 2.86k | } else { |
730 | 2.86k | const GError *reader_error; |
731 | | |
732 | 2.86k | reader_error = json_reader_get_error (deserializer->reader); |
733 | 2.86k | if (error && reader_error) |
734 | 28 | *error = g_error_copy (reader_error); |
735 | 2.86k | } |
736 | | |
737 | 2.92k | return FALSE; |
738 | 2.92k | } |
739 | 6.65M | } |
740 | | |
741 | 1.38M | if (!deserializer->cur_subject) |
742 | 1.30M | deserializer->cur_subject = g_strdup_printf ("_:%d", deserializer->blank_node_idx++); |
743 | | |
744 | 1.38M | return TRUE; |
745 | 1.38M | } |
746 | | |
747 | | static void |
748 | | tracker_deserializer_json_ld_close (TrackerSparqlCursor *cursor) |
749 | 8.51k | { |
750 | 8.51k | } |
751 | | |
752 | | gboolean |
753 | | tracker_deserializer_json_ld_get_parser_location (TrackerDeserializer *deserializer, |
754 | | const char **name, |
755 | | goffset *line_no, |
756 | | goffset *column_no) |
757 | 5.68k | { |
758 | 5.68k | if (name) |
759 | 5.68k | *name = tracker_deserializer_get_name (deserializer); |
760 | | |
761 | 5.68k | return FALSE; |
762 | 5.68k | } |
763 | | |
764 | | static void |
765 | | tracker_deserializer_json_ld_class_init (TrackerDeserializerJsonLDClass *klass) |
766 | 1 | { |
767 | 1 | GObjectClass *object_class = G_OBJECT_CLASS (klass); |
768 | 1 | TrackerSparqlCursorClass *cursor_class = |
769 | 1 | TRACKER_SPARQL_CURSOR_CLASS (klass); |
770 | 1 | TrackerDeserializerClass *deserializer_class = |
771 | 1 | TRACKER_DESERIALIZER_CLASS (klass); |
772 | | |
773 | 1 | object_class->finalize = tracker_deserializer_json_ld_finalize; |
774 | 1 | object_class->constructed = tracker_deserializer_json_ld_constructed; |
775 | | |
776 | 1 | cursor_class->get_value_type = tracker_deserializer_json_ld_get_value_type; |
777 | 1 | cursor_class->get_string = tracker_deserializer_json_ld_get_string; |
778 | 1 | cursor_class->next = tracker_deserializer_json_ld_next; |
779 | 1 | cursor_class->close = tracker_deserializer_json_ld_close; |
780 | | |
781 | 1 | deserializer_class->get_parser_location = |
782 | 1 | tracker_deserializer_json_ld_get_parser_location; |
783 | 1 | } |
784 | | |
785 | | static void |
786 | | tracker_deserializer_json_ld_init (TrackerDeserializerJsonLD *deserializer) |
787 | 8.51k | { |
788 | 8.51k | deserializer->parser = json_parser_new (); |
789 | 8.51k | deserializer->state_stack = g_array_new (FALSE, FALSE, sizeof (StateStack)); |
790 | 8.51k | g_array_set_clear_func (deserializer->state_stack, state_clear); |
791 | 8.51k | } |
792 | | |
793 | | TrackerSparqlCursor * |
794 | | tracker_deserializer_json_ld_new (GInputStream *stream, |
795 | | TrackerNamespaceManager *namespaces) |
796 | 8.51k | { |
797 | 8.51k | return g_object_new (TRACKER_TYPE_DESERIALIZER_JSON_LD, |
798 | 8.51k | "stream", stream, |
799 | 8.51k | "namespace-manager", namespaces, |
800 | | NULL); |
801 | 8.51k | } |