Coverage Report

Created: 2025-07-12 06:31

/src/tinysparql/src/libtinysparql/tracker-deserializer-xml.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2022, Red Hat Inc.
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the
16
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17
 * Boston, MA  02110-1301, USA.
18
 *
19
 * Author: Carlos Garnacho <carlosg@gnome.org>
20
 */
21
22
/* Deserialization to cursors for the XML format defined at:
23
 *   https://www.w3.org/TR/2013/REC-rdf-sparql-XMLres-20130321/
24
 */
25
26
#include "config.h"
27
28
#include "tracker-deserializer-xml.h"
29
30
#include <libxml/xmlreader.h>
31
32
typedef struct {
33
  TrackerSparqlValueType type;
34
  xmlChar *str;
35
  xmlChar *langtag;
36
} ColumnData;
37
38
struct _TrackerDeserializerXml {
39
  TrackerDeserializer parent_instance;
40
  xmlTextReaderPtr reader;
41
  GPtrArray *columns;
42
  GPtrArray *column_names;
43
  GError *error;
44
  gboolean started;
45
};
46
47
G_DEFINE_TYPE (TrackerDeserializerXml,
48
               tracker_deserializer_xml,
49
               TRACKER_TYPE_DESERIALIZER)
50
51
static ColumnData *
52
column_new (TrackerSparqlValueType  type,
53
            xmlChar                *str,
54
            xmlChar                *langtag)
55
0
{
56
0
  ColumnData *col;
57
58
0
  col = g_slice_new0 (ColumnData);
59
0
  col->type = type;
60
0
  col->str = str;
61
0
  col->langtag = langtag;
62
63
0
  return col;
64
0
}
65
66
static void
67
column_free (gpointer data)
68
0
{
69
0
  ColumnData *col = data;
70
71
0
  xmlFree (col->str);
72
0
  xmlFree (col->langtag);
73
0
  g_slice_free (ColumnData, col);
74
0
}
75
76
static void
77
tracker_deserializer_xml_finalize (GObject *object)
78
0
{
79
0
  TrackerDeserializerXml *deserializer =
80
0
    TRACKER_DESERIALIZER_XML (object);
81
82
0
  g_clear_pointer (&deserializer->reader, xmlFreeTextReader);
83
0
  g_ptr_array_unref (deserializer->columns);
84
85
0
  G_OBJECT_CLASS (tracker_deserializer_xml_parent_class)->finalize (object);
86
0
}
87
88
static int
89
stream_read (gpointer  context,
90
             gchar    *buf,
91
             int       len)
92
0
{
93
0
  GInputStream *stream = context;
94
95
0
  return g_input_stream_read (stream, buf, len, NULL, NULL);
96
0
}
97
98
static int
99
stream_close (gpointer context)
100
0
{
101
0
  GInputStream *stream = context;
102
103
0
  return g_input_stream_close (stream, NULL, NULL) ? 0 : -1;
104
0
}
105
106
static void
107
error_handler (gpointer                 user_data,
108
               const gchar             *msg,
109
               xmlParserSeverities      severity,
110
               xmlTextReaderLocatorPtr  locator)
111
0
{
112
0
  TrackerDeserializerXml *deserializer = user_data;
113
114
0
  deserializer->error = g_error_new (TRACKER_SPARQL_ERROR,
115
0
                                     TRACKER_SPARQL_ERROR_PARSE,
116
0
                                     "Could not parse XML response: %s",
117
0
                                     msg);
118
0
}
119
120
static gboolean
121
reader_in_element (TrackerDeserializerXml *deserializer,
122
                   const gchar            *name,
123
                   int                     depth)
124
0
{
125
0
  return (xmlTextReaderNodeType (deserializer->reader) == XML_READER_TYPE_ELEMENT &&
126
0
          g_strcmp0 ((gchar *) xmlTextReaderConstName (deserializer->reader), name) == 0 &&
127
0
          xmlTextReaderDepth (deserializer->reader) == depth);
128
0
}
129
130
static gboolean
131
parse_head (TrackerDeserializerXml  *deserializer,
132
            GError                 **error)
133
0
{
134
0
  gboolean seen_link = FALSE;
135
136
0
  if (xmlTextReaderRead(deserializer->reader) <= 0 ||
137
0
      !reader_in_element (deserializer, "head", 1))
138
0
    goto error;
139
140
0
  while (xmlTextReaderRead (deserializer->reader) > 0) {
141
0
    if (xmlTextReaderNodeType (deserializer->reader) == XML_READER_TYPE_END_ELEMENT)
142
0
      break;
143
144
0
    if (reader_in_element (deserializer, "variable", 2)) {
145
0
      xmlChar *name;
146
147
0
      if (seen_link) {
148
0
        g_set_error (error,
149
0
                     TRACKER_SPARQL_ERROR,
150
0
                     TRACKER_SPARQL_ERROR_PARSE,
151
0
                     "Wrong XML format, variable node found after link");
152
0
        break;
153
0
      }
154
155
0
      name = xmlTextReaderGetAttribute (deserializer->reader,
156
0
                                        (xmlChar *) "name");
157
0
      g_ptr_array_add (deserializer->column_names, name);
158
0
    } else if (reader_in_element (deserializer, "link", 2)) {
159
      /* We do nothing about extra links in headers, but still
160
       * mandate that these appear after all variable nodes
161
       * as per spec.
162
       */
163
0
      seen_link = TRUE;
164
0
    } else {
165
0
      goto error;
166
0
    }
167
0
  }
168
169
0
  return TRUE;
170
171
0
 error:
172
0
  g_set_error (error,
173
0
               TRACKER_SPARQL_ERROR,
174
0
               TRACKER_SPARQL_ERROR_PARSE,
175
0
               "Wrong XML format, unexpected node '%s'",
176
0
               xmlTextReaderConstName (deserializer->reader));
177
178
0
  return FALSE;
179
0
}
180
181
static void
182
tracker_deserializer_xml_constructed (GObject *object)
183
0
{
184
0
  TrackerDeserializerXml *deserializer =
185
0
    TRACKER_DESERIALIZER_XML (object);
186
0
  GInputStream *stream;
187
188
0
  G_OBJECT_CLASS (tracker_deserializer_xml_parent_class)->constructed (object);
189
190
0
  stream = tracker_deserializer_get_stream (TRACKER_DESERIALIZER (object));
191
192
0
  deserializer->reader = xmlReaderForIO (stream_read,
193
0
                                         stream_close,
194
0
                                         stream,
195
0
                                         NULL, NULL, 0);
196
0
  if (deserializer->reader) {
197
0
    xmlTextReaderSetErrorHandler (deserializer->reader,
198
0
                                  error_handler, deserializer);
199
0
  }
200
201
0
  if (deserializer->reader &&
202
0
      xmlTextReaderRead(deserializer->reader) > 0 &&
203
0
      reader_in_element (deserializer, "sparql", 0)) {
204
0
    parse_head (deserializer, &deserializer->error);
205
0
  } else {
206
0
    g_set_error (&deserializer->error,
207
0
                 TRACKER_SPARQL_ERROR,
208
0
                 TRACKER_SPARQL_ERROR_PARSE,
209
0
                 "Wrong XML format, variable node found after link");
210
0
  }
211
0
}
212
213
static gint
214
tracker_deserializer_xml_get_n_columns (TrackerSparqlCursor  *cursor)
215
0
{
216
0
  TrackerDeserializerXml *deserializer =
217
0
    TRACKER_DESERIALIZER_XML (cursor);
218
219
0
  return deserializer->column_names->len;
220
0
}
221
222
static TrackerSparqlValueType
223
tracker_deserializer_xml_get_value_type (TrackerSparqlCursor  *cursor,
224
                                         gint                  column)
225
0
{
226
0
  TrackerDeserializerXml *deserializer =
227
0
    TRACKER_DESERIALIZER_XML (cursor);
228
0
  ColumnData *col;
229
230
0
  if (column < 0 || column >= (gint) deserializer->columns->len)
231
0
    return TRACKER_SPARQL_VALUE_TYPE_UNBOUND;
232
233
0
  col = g_ptr_array_index (deserializer->columns, column);
234
235
0
  return col->type;
236
0
}
237
238
static const gchar *
239
tracker_deserializer_xml_get_variable_name (TrackerSparqlCursor  *cursor,
240
                                            gint                  column)
241
0
{
242
0
  TrackerDeserializerXml *deserializer =
243
0
    TRACKER_DESERIALIZER_XML (cursor);
244
245
0
  if (column < 0 || column >= (gint) deserializer->column_names->len)
246
0
    return NULL;
247
248
0
  return g_ptr_array_index (deserializer->column_names, column);
249
0
}
250
251
static const gchar *
252
tracker_deserializer_xml_get_string (TrackerSparqlCursor  *cursor,
253
                                     gint                  column,
254
                                     const gchar         **langtag,
255
                                     glong                *length)
256
0
{
257
0
  TrackerDeserializerXml *deserializer =
258
0
    TRACKER_DESERIALIZER_XML (cursor);
259
0
  ColumnData *col;
260
261
0
  if (length)
262
0
    *length = 0;
263
0
  if (langtag)
264
0
    *langtag = NULL;
265
266
0
  if (column < 0 || column >= (gint) deserializer->columns->len)
267
0
    return NULL;
268
269
0
  col = g_ptr_array_index (deserializer->columns, column);
270
271
0
  if (length)
272
0
    *length = strlen ((const gchar *) col->str);
273
0
  if (langtag)
274
0
    *langtag = (const gchar *) col->langtag;
275
276
0
  return (const gchar *) col->str;
277
0
}
278
279
static gboolean
280
maybe_propagate_error (TrackerDeserializerXml  *deserializer,
281
                       GError                 **error)
282
0
{
283
0
  if (deserializer->error) {
284
0
    g_propagate_error (error, deserializer->error);
285
0
    deserializer->error = NULL;
286
0
    return TRUE;
287
0
  }
288
289
0
  return FALSE;
290
0
}
291
292
static gboolean
293
parse_binding_type (TrackerDeserializerXml  *deserializer,
294
                    TrackerSparqlValueType  *type,
295
                    GError                 **error)
296
0
{
297
0
  if (reader_in_element (deserializer, "uri", 4)) {
298
0
    *type = TRACKER_SPARQL_VALUE_TYPE_URI;
299
0
  } else if (reader_in_element (deserializer, "bnode", 4)) {
300
0
    *type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
301
0
  } else if (reader_in_element (deserializer, "literal", 4)) {
302
0
    xmlChar *datatype;
303
0
    const gchar *suffix;
304
305
0
    datatype = xmlTextReaderGetAttribute (deserializer->reader,
306
0
                                          (xmlChar *) "datatype");
307
308
0
    if (!datatype ||
309
0
        !g_str_has_prefix ((const gchar *) datatype, TRACKER_PREFIX_XSD)) {
310
0
      *type = TRACKER_SPARQL_VALUE_TYPE_STRING;
311
0
      return TRUE;
312
0
    }
313
314
0
    suffix = (const gchar *) &datatype[strlen (TRACKER_PREFIX_XSD)];
315
316
0
    if (g_str_equal (suffix, "byte") ||
317
0
        g_str_equal (suffix, "int") ||
318
0
        g_str_equal (suffix, "integer") ||
319
0
        g_str_equal (suffix, "long"))
320
0
      *type = TRACKER_SPARQL_VALUE_TYPE_INTEGER;
321
0
    else if (g_str_equal (suffix, "decimal") ||
322
0
             g_str_equal (suffix, "double"))
323
0
      *type = TRACKER_SPARQL_VALUE_TYPE_DOUBLE;
324
0
    else if (g_str_equal (suffix, "date") ||
325
0
             g_str_equal (suffix, "dateTime"))
326
0
      *type = TRACKER_SPARQL_VALUE_TYPE_DATETIME;
327
0
    else if (g_str_equal (suffix, "boolean"))
328
0
      *type = TRACKER_SPARQL_VALUE_TYPE_BOOLEAN;
329
0
    else
330
0
      *type = TRACKER_SPARQL_VALUE_TYPE_STRING;
331
332
0
    xmlFree (datatype);
333
0
  } else {
334
0
    g_set_error (error,
335
0
                 TRACKER_SPARQL_ERROR,
336
0
                 TRACKER_SPARQL_ERROR_PARSE,
337
0
                 "Unknown binding type '%s'",
338
0
                 xmlTextReaderConstName (deserializer->reader));
339
0
    return FALSE;
340
0
  }
341
342
0
  return TRUE;
343
0
}
344
345
static gboolean
346
parse_binding (TrackerDeserializerXml  *deserializer,
347
               TrackerSparqlValueType  *type,
348
               xmlChar                **name,
349
               xmlChar                **value,
350
               xmlChar                **langtag,
351
               GError                 **error)
352
0
{
353
0
  xmlChar *binding_name = NULL, *binding_value = NULL, *binding_langtag = NULL;
354
355
0
  if (!reader_in_element (deserializer, "binding", 3))
356
0
    goto error;
357
358
0
  binding_name = xmlTextReaderGetAttribute (deserializer->reader,
359
0
                                            (xmlChar *) "name");
360
361
0
  if (xmlTextReaderRead(deserializer->reader) <= 0)
362
0
    goto error;
363
364
0
  binding_langtag = xmlTextReaderGetAttribute (deserializer->reader,
365
0
                                               (xmlChar *) "xml:lang");
366
367
0
  if (!parse_binding_type (deserializer, type, error))
368
0
    goto error_already_set;
369
370
0
  if (xmlTextReaderRead(deserializer->reader) <= 0)
371
0
    goto error;
372
373
0
  binding_value = xmlTextReaderValue (deserializer->reader);
374
375
  /* End of binding content */
376
0
  if (xmlTextReaderRead(deserializer->reader) <= 0 ||
377
0
      xmlTextReaderNodeType (deserializer->reader) != XML_READER_TYPE_END_ELEMENT)
378
0
    goto error;
379
380
  /* End of binding */
381
0
  if (xmlTextReaderRead(deserializer->reader) <= 0 ||
382
0
      xmlTextReaderNodeType (deserializer->reader) != XML_READER_TYPE_END_ELEMENT)
383
0
    goto error;
384
385
0
  *name = binding_name;
386
0
  *value = binding_value;
387
0
  *langtag = binding_langtag;
388
389
0
  return TRUE;
390
0
 error:
391
0
  g_set_error (error,
392
0
               TRACKER_SPARQL_ERROR,
393
0
               TRACKER_SPARQL_ERROR_PARSE,
394
0
               "Wrong XML format, unexpected node '%s'",
395
0
               xmlTextReaderConstName (deserializer->reader));
396
0
 error_already_set:
397
0
  g_clear_pointer (&binding_name, xmlFree);
398
0
  g_clear_pointer (&binding_value, xmlFree);
399
0
  g_clear_pointer (&binding_langtag, xmlFree);
400
401
0
  return FALSE;
402
0
}
403
404
static gboolean
405
parse_result (TrackerDeserializerXml  *deserializer,
406
              GError                 **error)
407
0
{
408
0
  TrackerSparqlCursor *cursor = TRACKER_SPARQL_CURSOR (deserializer);
409
0
  const gchar *var_name;
410
0
  GHashTable *ht = NULL;
411
0
  gint n_columns, i;
412
413
0
  if (!reader_in_element (deserializer, "result", 2))
414
0
    goto error;
415
416
0
  g_ptr_array_set_size (deserializer->columns, 0);
417
0
  ht = g_hash_table_new_full (g_str_hash, g_str_equal, xmlFree, column_free);
418
419
0
  while (xmlTextReaderRead (deserializer->reader) > 0) {
420
0
    ColumnData *col;
421
0
    xmlChar *name, *value, *langtag;
422
0
    TrackerSparqlValueType type;
423
424
0
    if (xmlTextReaderNodeType (deserializer->reader) == XML_READER_TYPE_END_ELEMENT)
425
0
      break;
426
427
0
    if (!parse_binding (deserializer, &type, &name, &value, &langtag, error))
428
0
      goto error_already_set;
429
430
0
    col = column_new (type, value, langtag);
431
0
    g_hash_table_insert (ht, name, col);
432
0
  }
433
434
0
  if (maybe_propagate_error (deserializer, error))
435
0
    goto error_already_set;
436
437
0
  n_columns = tracker_sparql_cursor_get_n_columns (cursor);
438
439
0
  for (i = 0; i < n_columns; i++) {
440
0
    ColumnData *col;
441
442
0
    var_name = tracker_sparql_cursor_get_variable_name (cursor, i);
443
0
    col = g_hash_table_lookup (ht, var_name);
444
0
    g_hash_table_steal (ht, var_name);
445
0
    if (!col)
446
0
      col = column_new (TRACKER_SPARQL_VALUE_TYPE_UNBOUND, NULL, NULL);
447
448
0
    g_ptr_array_add (deserializer->columns, col);
449
0
  }
450
451
  /* There should be no bindings left */
452
0
  if (g_hash_table_size (ht) > 0) {
453
0
    g_set_error (error,
454
0
                 TRACKER_SPARQL_ERROR,
455
0
                 TRACKER_SPARQL_ERROR_PARSE,
456
0
                 "Wrong XML format, unexpected additional bindings");
457
0
    return FALSE;
458
0
  }
459
460
0
  return TRUE;
461
462
0
 error:
463
0
  g_set_error (error,
464
0
               TRACKER_SPARQL_ERROR,
465
0
               TRACKER_SPARQL_ERROR_PARSE,
466
0
               "Wrong XML format, unexpected node '%s'",
467
0
               xmlTextReaderConstName (deserializer->reader));
468
0
 error_already_set:
469
0
  g_clear_pointer (&ht, g_hash_table_unref);
470
0
  return FALSE;
471
0
}
472
473
static gboolean
474
tracker_deserializer_xml_next (TrackerSparqlCursor  *cursor,
475
                               GCancellable         *cancellable,
476
                               GError              **error)
477
0
{
478
0
  TrackerDeserializerXml *deserializer =
479
0
    TRACKER_DESERIALIZER_XML (cursor);
480
481
0
  if (g_cancellable_set_error_if_cancelled (cancellable, error))
482
0
    return FALSE;
483
484
0
  g_ptr_array_set_size (deserializer->columns, 0);
485
486
0
 again:
487
0
  if (xmlTextReaderRead(deserializer->reader) <= 0) {
488
0
    if (!maybe_propagate_error (deserializer, error)) {
489
0
      g_set_error (error,
490
0
                   TRACKER_SPARQL_ERROR,
491
0
                   TRACKER_SPARQL_ERROR_PARSE,
492
0
                   "Unexpected termination of XML document");
493
0
    }
494
0
    return FALSE;
495
0
  }
496
497
0
  if (!deserializer->started) {
498
0
    if (reader_in_element (deserializer, "results", 1)) {
499
0
      deserializer->started = TRUE;
500
      /* We want to read the next element, the first <result> */
501
0
      goto again;
502
0
    } else if (reader_in_element (deserializer, "boolean", 1)) {
503
0
      ColumnData *col;
504
0
      xmlChar *content;
505
506
0
      content = xmlTextReaderValue (deserializer->reader);
507
0
      col = column_new (TRACKER_SPARQL_VALUE_TYPE_BOOLEAN, content, NULL);
508
0
      g_ptr_array_add (deserializer->columns, col);
509
0
    } else {
510
0
      g_set_error (error,
511
0
                   TRACKER_SPARQL_ERROR,
512
0
                   TRACKER_SPARQL_ERROR_PARSE,
513
0
                   "Wrong XML format, unexpected node '%s'",
514
0
                   xmlTextReaderConstName (deserializer->reader));
515
0
      return FALSE;
516
0
    }
517
0
  }
518
519
  /* We've reached the end of results */
520
0
  if (xmlTextReaderNodeType (deserializer->reader) == XML_READER_TYPE_END_ELEMENT)
521
0
    return FALSE;
522
523
0
  return parse_result (deserializer, error);
524
0
}
525
526
static void
527
tracker_deserializer_xml_next_async (TrackerSparqlCursor  *cursor,
528
                                     GCancellable         *cancellable,
529
                                     GAsyncReadyCallback   cb,
530
                                     gpointer              user_data)
531
0
{
532
0
  GError *error = NULL;
533
0
  GTask *task;
534
535
0
  task = g_task_new (cursor, cancellable, cb, user_data);
536
537
0
  if (tracker_sparql_cursor_next (cursor, cancellable, &error))
538
0
    g_task_return_boolean (task, TRUE);
539
0
  else if (!error)
540
0
    g_task_return_boolean (task, FALSE);
541
0
  else
542
0
    g_task_return_error (task, error);
543
544
0
  g_object_unref (task);
545
0
}
546
547
static gboolean
548
tracker_deserializer_xml_next_finish (TrackerSparqlCursor  *cursor,
549
                                      GAsyncResult         *res,
550
                                      GError              **error)
551
0
{
552
0
  return g_task_propagate_boolean (G_TASK (res), error);
553
0
}
554
555
static void
556
tracker_deserializer_xml_close (TrackerSparqlCursor *cursor)
557
0
{
558
0
  TrackerDeserializerXml *deserializer =
559
0
    TRACKER_DESERIALIZER_XML (cursor);
560
561
0
  xmlTextReaderClose (deserializer->reader);
562
563
0
  TRACKER_SPARQL_CURSOR_CLASS (tracker_deserializer_xml_parent_class)->close (cursor);
564
0
}
565
566
gboolean
567
tracker_deserializer_xml_get_parser_location (TrackerDeserializer *deserializer,
568
                                              goffset             *line_no,
569
                                              goffset             *column_no)
570
0
{
571
0
  TrackerDeserializerXml *deserializer_xml =
572
0
    TRACKER_DESERIALIZER_XML (deserializer);
573
574
0
  *line_no = xmlTextReaderGetParserLineNumber (deserializer_xml->reader);
575
0
  *column_no = xmlTextReaderGetParserColumnNumber (deserializer_xml->reader);
576
577
0
  return TRUE;
578
0
}
579
580
static void
581
tracker_deserializer_xml_class_init (TrackerDeserializerXmlClass *klass)
582
0
{
583
0
  GObjectClass *object_class = G_OBJECT_CLASS (klass);
584
0
  TrackerSparqlCursorClass *cursor_class =
585
0
    TRACKER_SPARQL_CURSOR_CLASS (klass);
586
0
  TrackerDeserializerClass *deserializer_class =
587
0
    TRACKER_DESERIALIZER_CLASS (klass);
588
589
0
  object_class->finalize = tracker_deserializer_xml_finalize;
590
0
  object_class->constructed = tracker_deserializer_xml_constructed;
591
592
0
  cursor_class->get_n_columns = tracker_deserializer_xml_get_n_columns;
593
0
  cursor_class->get_value_type = tracker_deserializer_xml_get_value_type;
594
0
  cursor_class->get_variable_name = tracker_deserializer_xml_get_variable_name;
595
0
  cursor_class->get_string = tracker_deserializer_xml_get_string;
596
0
  cursor_class->next = tracker_deserializer_xml_next;
597
0
  cursor_class->next_async = tracker_deserializer_xml_next_async;
598
0
  cursor_class->next_finish = tracker_deserializer_xml_next_finish;
599
0
  cursor_class->close = tracker_deserializer_xml_close;
600
601
0
  deserializer_class->get_parser_location =
602
0
    tracker_deserializer_xml_get_parser_location;
603
0
}
604
605
static void
606
tracker_deserializer_xml_init (TrackerDeserializerXml *deserializer)
607
0
{
608
0
  deserializer->columns = g_ptr_array_new_with_free_func (column_free);
609
0
  deserializer->column_names = g_ptr_array_new_with_free_func (xmlFree);
610
0
}
611
612
TrackerSparqlCursor *
613
tracker_deserializer_xml_new (GInputStream            *stream,
614
                              TrackerNamespaceManager *namespaces)
615
0
{
616
0
  return g_object_new (TRACKER_TYPE_DESERIALIZER_XML,
617
0
                       "stream", stream,
618
0
                       "namespace-manager", namespaces,
619
0
                       NULL);
620
0
}