Coverage Report

Created: 2025-08-12 06:43

/src/postgres/src/backend/utils/adt/xml.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * xml.c
4
 *    XML data type support.
5
 *
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * src/backend/utils/adt/xml.c
11
 *
12
 *-------------------------------------------------------------------------
13
 */
14
15
/*
16
 * Generally, XML type support is only available when libxml use was
17
 * configured during the build.  But even if that is not done, the
18
 * type and all the functions are available, but most of them will
19
 * fail.  For one thing, this avoids having to manage variant catalog
20
 * installations.  But it also has nice effects such as that you can
21
 * dump a database containing XML type data even if the server is not
22
 * linked with libxml.  Thus, make sure xml_out() works even if nothing
23
 * else does.
24
 */
25
26
/*
27
 * Notes on memory management:
28
 *
29
 * Sometimes libxml allocates global structures in the hope that it can reuse
30
 * them later on.  This makes it impractical to change the xmlMemSetup
31
 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32
 * allocated with malloc() or vice versa.  Since libxml might be used by
33
 * loadable modules, eg libperl, our only safe choices are to change the
34
 * functions at postmaster/backend launch or not at all.  Since we'd rather
35
 * not activate libxml in sessions that might never use it, the latter choice
36
 * is the preferred one.  However, for debugging purposes it can be awfully
37
 * handy to constrain libxml's allocations to be done in a specific palloc
38
 * context, where they're easy to track.  Therefore there is code here that
39
 * can be enabled in debug builds to redirect libxml's allocations into a
40
 * special context LibxmlContext.  It's not recommended to turn this on in
41
 * a production build because of the possibility of bad interactions with
42
 * external modules.
43
 */
44
/* #define USE_LIBXMLCONTEXT */
45
46
#include "postgres.h"
47
48
#ifdef USE_LIBXML
49
#include <libxml/chvalid.h>
50
#include <libxml/entities.h>
51
#include <libxml/parser.h>
52
#include <libxml/parserInternals.h>
53
#include <libxml/tree.h>
54
#include <libxml/uri.h>
55
#include <libxml/xmlerror.h>
56
#include <libxml/xmlsave.h>
57
#include <libxml/xmlversion.h>
58
#include <libxml/xmlwriter.h>
59
#include <libxml/xpath.h>
60
#include <libxml/xpathInternals.h>
61
62
/*
63
 * We used to check for xmlStructuredErrorContext via a configure test; but
64
 * that doesn't work on Windows, so instead use this grottier method of
65
 * testing the library version number.
66
 */
67
#if LIBXML_VERSION >= 20704
68
#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69
#endif
70
71
/*
72
 * libxml2 2.12 decided to insert "const" into the error handler API.
73
 */
74
#if LIBXML_VERSION >= 21200
75
#define PgXmlErrorPtr const xmlError *
76
#else
77
#define PgXmlErrorPtr xmlErrorPtr
78
#endif
79
80
#endif              /* USE_LIBXML */
81
82
#include "access/htup_details.h"
83
#include "access/table.h"
84
#include "catalog/namespace.h"
85
#include "catalog/pg_class.h"
86
#include "catalog/pg_type.h"
87
#include "commands/dbcommands.h"
88
#include "executor/spi.h"
89
#include "executor/tablefunc.h"
90
#include "fmgr.h"
91
#include "lib/stringinfo.h"
92
#include "libpq/pqformat.h"
93
#include "mb/pg_wchar.h"
94
#include "miscadmin.h"
95
#include "nodes/execnodes.h"
96
#include "nodes/miscnodes.h"
97
#include "nodes/nodeFuncs.h"
98
#include "utils/array.h"
99
#include "utils/builtins.h"
100
#include "utils/date.h"
101
#include "utils/datetime.h"
102
#include "utils/lsyscache.h"
103
#include "utils/rel.h"
104
#include "utils/syscache.h"
105
#include "utils/xml.h"
106
107
108
/* GUC variables */
109
int     xmlbinary = XMLBINARY_BASE64;
110
int     xmloption = XMLOPTION_CONTENT;
111
112
#ifdef USE_LIBXML
113
114
/* random number to identify PgXmlErrorContext */
115
#define ERRCXT_MAGIC  68275028
116
117
struct PgXmlErrorContext
118
{
119
  int     magic;
120
  /* strictness argument passed to pg_xml_init */
121
  PgXmlStrictness strictness;
122
  /* current error status and accumulated message, if any */
123
  bool    err_occurred;
124
  StringInfoData err_buf;
125
  /* previous libxml error handling state (saved by pg_xml_init) */
126
  xmlStructuredErrorFunc saved_errfunc;
127
  void     *saved_errcxt;
128
  /* previous libxml entity handler (saved by pg_xml_init) */
129
  xmlExternalEntityLoader saved_entityfunc;
130
};
131
132
static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
133
                       xmlParserCtxtPtr ctxt);
134
static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
135
            int sqlcode, const char *msg);
136
static void xml_errorHandler(void *data, PgXmlErrorPtr error);
137
static int  errdetail_for_xml_code(int code);
138
static void chopStringInfoNewlines(StringInfo str);
139
static void appendStringInfoLineSeparator(StringInfo str);
140
141
#ifdef USE_LIBXMLCONTEXT
142
143
static MemoryContext LibxmlContext = NULL;
144
145
static void xml_memory_init(void);
146
static void *xml_palloc(size_t size);
147
static void *xml_repalloc(void *ptr, size_t size);
148
static void xml_pfree(void *ptr);
149
static char *xml_pstrdup(const char *string);
150
#endif              /* USE_LIBXMLCONTEXT */
151
152
static xmlChar *xml_text2xmlChar(text *in);
153
static int  parse_xml_decl(const xmlChar *str, size_t *lenp,
154
               xmlChar **version, xmlChar **encoding, int *standalone);
155
static bool print_xml_decl(StringInfo buf, const xmlChar *version,
156
               pg_enc encoding, int standalone);
157
static bool xml_doctype_in_content(const xmlChar *str);
158
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
159
               bool preserve_whitespace, int encoding,
160
               XmlOptionType *parsed_xmloptiontype,
161
               xmlNodePtr *parsed_nodes,
162
               Node *escontext);
163
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
164
static int  xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
165
                   ArrayBuildState *astate,
166
                   PgXmlErrorContext *xmlerrcxt);
167
static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
168
#endif              /* USE_LIBXML */
169
170
static void xmldata_root_element_start(StringInfo result, const char *eltname,
171
                     const char *xmlschema, const char *targetns,
172
                     bool top_level);
173
static void xmldata_root_element_end(StringInfo result, const char *eltname);
174
static StringInfo query_to_xml_internal(const char *query, char *tablename,
175
                    const char *xmlschema, bool nulls, bool tableforest,
176
                    const char *targetns, bool top_level);
177
static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
178
                        bool nulls, bool tableforest, const char *targetns);
179
static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
180
                           List *relid_list, bool nulls,
181
                           bool tableforest, const char *targetns);
182
static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
183
                            bool nulls, bool tableforest,
184
                            const char *targetns);
185
static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
186
static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
187
static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
188
static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
189
                    char *tablename, bool nulls, bool tableforest,
190
                    const char *targetns, bool top_level);
191
192
/* XMLTABLE support */
193
#ifdef USE_LIBXML
194
/* random number to identify XmlTableContext */
195
#define XMLTABLE_CONTEXT_MAGIC  46922182
196
typedef struct XmlTableBuilderData
197
{
198
  int     magic;
199
  int     natts;
200
  long int  row_count;
201
  PgXmlErrorContext *xmlerrcxt;
202
  xmlParserCtxtPtr ctxt;
203
  xmlDocPtr doc;
204
  xmlXPathContextPtr xpathcxt;
205
  xmlXPathCompExprPtr xpathcomp;
206
  xmlXPathObjectPtr xpathobj;
207
  xmlXPathCompExprPtr *xpathscomp;
208
} XmlTableBuilderData;
209
#endif
210
211
static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
212
static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
213
static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
214
                 const char *uri);
215
static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
216
static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
217
                  const char *path, int colnum);
218
static bool XmlTableFetchRow(struct TableFuncScanState *state);
219
static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
220
                Oid typid, int32 typmod, bool *isnull);
221
static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
222
223
const TableFuncRoutine XmlTableRoutine =
224
{
225
  .InitOpaque = XmlTableInitOpaque,
226
  .SetDocument = XmlTableSetDocument,
227
  .SetNamespace = XmlTableSetNamespace,
228
  .SetRowFilter = XmlTableSetRowFilter,
229
  .SetColumnFilter = XmlTableSetColumnFilter,
230
  .FetchRow = XmlTableFetchRow,
231
  .GetValue = XmlTableGetValue,
232
  .DestroyOpaque = XmlTableDestroyOpaque
233
};
234
235
#define NO_XML_SUPPORT() \
236
0
  ereport(ERROR, \
237
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
238
0
       errmsg("unsupported XML feature"), \
239
0
       errdetail("This functionality requires the server to be built with libxml support.")))
240
241
242
/* from SQL/XML:2008 section 4.9 */
243
#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
244
#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
245
#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
246
247
248
#ifdef USE_LIBXML
249
250
static int
251
xmlChar_to_encoding(const xmlChar *encoding_name)
252
{
253
  int     encoding = pg_char_to_encoding((const char *) encoding_name);
254
255
  if (encoding < 0)
256
    ereport(ERROR,
257
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
258
         errmsg("invalid encoding name \"%s\"",
259
            (const char *) encoding_name)));
260
  return encoding;
261
}
262
#endif
263
264
265
/*
266
 * xml_in uses a plain C string to VARDATA conversion, so for the time being
267
 * we use the conversion function for the text datatype.
268
 *
269
 * This is only acceptable so long as xmltype and text use the same
270
 * representation.
271
 */
272
Datum
273
xml_in(PG_FUNCTION_ARGS)
274
0
{
275
#ifdef USE_LIBXML
276
  char     *s = PG_GETARG_CSTRING(0);
277
  xmltype    *vardata;
278
  xmlDocPtr doc;
279
280
  /* Build the result object. */
281
  vardata = (xmltype *) cstring_to_text(s);
282
283
  /*
284
   * Parse the data to check if it is well-formed XML data.
285
   *
286
   * Note: we don't need to worry about whether a soft error is detected.
287
   */
288
  doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
289
          NULL, NULL, fcinfo->context);
290
  if (doc != NULL)
291
    xmlFreeDoc(doc);
292
293
  PG_RETURN_XML_P(vardata);
294
#else
295
0
  NO_XML_SUPPORT();
296
0
  return 0;
297
0
#endif
298
0
}
299
300
301
#define PG_XML_DEFAULT_VERSION "1.0"
302
303
304
/*
305
 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
306
 * time being we use the conversion function for the text datatype.
307
 *
308
 * This is only acceptable so long as xmltype and text use the same
309
 * representation.
310
 */
311
static char *
312
xml_out_internal(xmltype *x, pg_enc target_encoding)
313
0
{
314
0
  char     *str = text_to_cstring((text *) x);
315
316
#ifdef USE_LIBXML
317
  size_t    len = strlen(str);
318
  xmlChar    *version;
319
  int     standalone;
320
  int     res_code;
321
322
  if ((res_code = parse_xml_decl((xmlChar *) str,
323
                   &len, &version, NULL, &standalone)) == 0)
324
  {
325
    StringInfoData buf;
326
327
    initStringInfo(&buf);
328
329
    if (!print_xml_decl(&buf, version, target_encoding, standalone))
330
    {
331
      /*
332
       * If we are not going to produce an XML declaration, eat a single
333
       * newline in the original string to prevent empty first lines in
334
       * the output.
335
       */
336
      if (*(str + len) == '\n')
337
        len += 1;
338
    }
339
    appendStringInfoString(&buf, str + len);
340
341
    pfree(str);
342
343
    return buf.data;
344
  }
345
346
  ereport(WARNING,
347
      errcode(ERRCODE_DATA_CORRUPTED),
348
      errmsg_internal("could not parse XML declaration in stored value"),
349
      errdetail_for_xml_code(res_code));
350
#endif
351
0
  return str;
352
0
}
353
354
355
Datum
356
xml_out(PG_FUNCTION_ARGS)
357
0
{
358
0
  xmltype    *x = PG_GETARG_XML_P(0);
359
360
  /*
361
   * xml_out removes the encoding property in all cases.  This is because we
362
   * cannot control from here whether the datum will be converted to a
363
   * different client encoding, so we'd do more harm than good by including
364
   * it.
365
   */
366
0
  PG_RETURN_CSTRING(xml_out_internal(x, 0));
367
0
}
368
369
370
Datum
371
xml_recv(PG_FUNCTION_ARGS)
372
0
{
373
#ifdef USE_LIBXML
374
  StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
375
  xmltype    *result;
376
  char     *str;
377
  char     *newstr;
378
  int     nbytes;
379
  xmlDocPtr doc;
380
  xmlChar    *encodingStr = NULL;
381
  int     encoding;
382
383
  /*
384
   * Read the data in raw format. We don't know yet what the encoding is, as
385
   * that information is embedded in the xml declaration; so we have to
386
   * parse that before converting to server encoding.
387
   */
388
  nbytes = buf->len - buf->cursor;
389
  str = (char *) pq_getmsgbytes(buf, nbytes);
390
391
  /*
392
   * We need a null-terminated string to pass to parse_xml_decl().  Rather
393
   * than make a separate copy, make the temporary result one byte bigger
394
   * than it needs to be.
395
   */
396
  result = palloc(nbytes + 1 + VARHDRSZ);
397
  SET_VARSIZE(result, nbytes + VARHDRSZ);
398
  memcpy(VARDATA(result), str, nbytes);
399
  str = VARDATA(result);
400
  str[nbytes] = '\0';
401
402
  parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403
404
  /*
405
   * If encoding wasn't explicitly specified in the XML header, treat it as
406
   * UTF-8, as that's the default in XML. This is different from xml_in(),
407
   * where the input has to go through the normal client to server encoding
408
   * conversion.
409
   */
410
  encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411
412
  /*
413
   * Parse the data to check if it is well-formed XML data.  Assume that
414
   * xml_parse will throw ERROR if not.
415
   */
416
  doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417
  xmlFreeDoc(doc);
418
419
  /* Now that we know what we're dealing with, convert to server encoding */
420
  newstr = pg_any_to_server(str, nbytes, encoding);
421
422
  if (newstr != str)
423
  {
424
    pfree(result);
425
    result = (xmltype *) cstring_to_text(newstr);
426
    pfree(newstr);
427
  }
428
429
  PG_RETURN_XML_P(result);
430
#else
431
0
  NO_XML_SUPPORT();
432
0
  return 0;
433
0
#endif
434
0
}
435
436
437
Datum
438
xml_send(PG_FUNCTION_ARGS)
439
0
{
440
0
  xmltype    *x = PG_GETARG_XML_P(0);
441
0
  char     *outval;
442
0
  StringInfoData buf;
443
444
  /*
445
   * xml_out_internal doesn't convert the encoding, it just prints the right
446
   * declaration. pq_sendtext will do the conversion.
447
   */
448
0
  outval = xml_out_internal(x, pg_get_client_encoding());
449
450
0
  pq_begintypsend(&buf);
451
0
  pq_sendtext(&buf, outval, strlen(outval));
452
0
  pfree(outval);
453
0
  PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454
0
}
455
456
457
#ifdef USE_LIBXML
458
static void
459
appendStringInfoText(StringInfo str, const text *t)
460
{
461
  appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462
}
463
#endif
464
465
466
static xmltype *
467
stringinfo_to_xmltype(StringInfo buf)
468
0
{
469
0
  return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470
0
}
471
472
473
static xmltype *
474
cstring_to_xmltype(const char *string)
475
0
{
476
0
  return (xmltype *) cstring_to_text(string);
477
0
}
478
479
480
#ifdef USE_LIBXML
481
static xmltype *
482
xmlBuffer_to_xmltype(xmlBufferPtr buf)
483
{
484
  return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485
                        xmlBufferLength(buf));
486
}
487
#endif
488
489
490
Datum
491
xmlcomment(PG_FUNCTION_ARGS)
492
0
{
493
#ifdef USE_LIBXML
494
  text     *arg = PG_GETARG_TEXT_PP(0);
495
  char     *argdata = VARDATA_ANY(arg);
496
  int     len = VARSIZE_ANY_EXHDR(arg);
497
  StringInfoData buf;
498
  int     i;
499
500
  /* check for "--" in string or "-" at the end */
501
  for (i = 1; i < len; i++)
502
  {
503
    if (argdata[i] == '-' && argdata[i - 1] == '-')
504
      ereport(ERROR,
505
          (errcode(ERRCODE_INVALID_XML_COMMENT),
506
           errmsg("invalid XML comment")));
507
  }
508
  if (len > 0 && argdata[len - 1] == '-')
509
    ereport(ERROR,
510
        (errcode(ERRCODE_INVALID_XML_COMMENT),
511
         errmsg("invalid XML comment")));
512
513
  initStringInfo(&buf);
514
  appendStringInfoString(&buf, "<!--");
515
  appendStringInfoText(&buf, arg);
516
  appendStringInfoString(&buf, "-->");
517
518
  PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519
#else
520
0
  NO_XML_SUPPORT();
521
0
  return 0;
522
0
#endif
523
0
}
524
525
526
Datum
527
xmltext(PG_FUNCTION_ARGS)
528
0
{
529
#ifdef USE_LIBXML
530
  text     *arg = PG_GETARG_TEXT_PP(0);
531
  text     *result;
532
  volatile xmlChar *xmlbuf = NULL;
533
  PgXmlErrorContext *xmlerrcxt;
534
535
  /* First we gotta spin up some error handling. */
536
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
537
538
  PG_TRY();
539
  {
540
    xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
541
542
    if (xmlbuf == NULL || xmlerrcxt->err_occurred)
543
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
544
            "could not allocate xmlChar");
545
546
    result = cstring_to_text_with_len((const char *) xmlbuf,
547
                      xmlStrlen((const xmlChar *) xmlbuf));
548
  }
549
  PG_CATCH();
550
  {
551
    if (xmlbuf)
552
      xmlFree((xmlChar *) xmlbuf);
553
554
    pg_xml_done(xmlerrcxt, true);
555
    PG_RE_THROW();
556
  }
557
  PG_END_TRY();
558
559
  xmlFree((xmlChar *) xmlbuf);
560
  pg_xml_done(xmlerrcxt, false);
561
562
  PG_RETURN_XML_P(result);
563
#else
564
0
  NO_XML_SUPPORT();
565
0
  return 0;
566
0
#endif              /* not USE_LIBXML */
567
0
}
568
569
570
/*
571
 * TODO: xmlconcat needs to merge the notations and unparsed entities
572
 * of the argument values.  Not very important in practice, though.
573
 */
574
xmltype *
575
xmlconcat(List *args)
576
0
{
577
#ifdef USE_LIBXML
578
  int     global_standalone = 1;
579
  xmlChar    *global_version = NULL;
580
  bool    global_version_no_value = false;
581
  StringInfoData buf;
582
  ListCell   *v;
583
584
  initStringInfo(&buf);
585
  foreach(v, args)
586
  {
587
    xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
588
    size_t    len;
589
    xmlChar    *version;
590
    int     standalone;
591
    char     *str;
592
593
    len = VARSIZE(x) - VARHDRSZ;
594
    str = text_to_cstring((text *) x);
595
596
    parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
597
598
    if (standalone == 0 && global_standalone == 1)
599
      global_standalone = 0;
600
    if (standalone < 0)
601
      global_standalone = -1;
602
603
    if (!version)
604
      global_version_no_value = true;
605
    else if (!global_version)
606
      global_version = version;
607
    else if (xmlStrcmp(version, global_version) != 0)
608
      global_version_no_value = true;
609
610
    appendStringInfoString(&buf, str + len);
611
    pfree(str);
612
  }
613
614
  if (!global_version_no_value || global_standalone >= 0)
615
  {
616
    StringInfoData buf2;
617
618
    initStringInfo(&buf2);
619
620
    print_xml_decl(&buf2,
621
             (!global_version_no_value) ? global_version : NULL,
622
             0,
623
             global_standalone);
624
625
    appendBinaryStringInfo(&buf2, buf.data, buf.len);
626
    buf = buf2;
627
  }
628
629
  return stringinfo_to_xmltype(&buf);
630
#else
631
0
  NO_XML_SUPPORT();
632
0
  return NULL;
633
0
#endif
634
0
}
635
636
637
/*
638
 * XMLAGG support
639
 */
640
Datum
641
xmlconcat2(PG_FUNCTION_ARGS)
642
0
{
643
0
  if (PG_ARGISNULL(0))
644
0
  {
645
0
    if (PG_ARGISNULL(1))
646
0
      PG_RETURN_NULL();
647
0
    else
648
0
      PG_RETURN_XML_P(PG_GETARG_XML_P(1));
649
0
  }
650
0
  else if (PG_ARGISNULL(1))
651
0
    PG_RETURN_XML_P(PG_GETARG_XML_P(0));
652
0
  else
653
0
    PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
654
0
                       PG_GETARG_XML_P(1))));
655
0
}
656
657
658
Datum
659
texttoxml(PG_FUNCTION_ARGS)
660
0
{
661
0
  text     *data = PG_GETARG_TEXT_PP(0);
662
663
0
  PG_RETURN_XML_P(xmlparse(data, xmloption, true));
664
0
}
665
666
667
Datum
668
xmltotext(PG_FUNCTION_ARGS)
669
0
{
670
0
  xmltype    *data = PG_GETARG_XML_P(0);
671
672
  /* It's actually binary compatible. */
673
0
  PG_RETURN_TEXT_P((text *) data);
674
0
}
675
676
677
text *
678
xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
679
0
{
680
#ifdef USE_LIBXML
681
  text     *volatile result;
682
  xmlDocPtr doc;
683
  XmlOptionType parsed_xmloptiontype;
684
  xmlNodePtr  content_nodes;
685
  volatile xmlBufferPtr buf = NULL;
686
  volatile xmlSaveCtxtPtr ctxt = NULL;
687
  ErrorSaveContext escontext = {T_ErrorSaveContext};
688
  PgXmlErrorContext *volatile xmlerrcxt = NULL;
689
#endif
690
691
0
  if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
692
0
  {
693
    /*
694
     * We don't actually need to do anything, so just return the
695
     * binary-compatible input.  For backwards-compatibility reasons,
696
     * allow such cases to succeed even without USE_LIBXML.
697
     */
698
0
    return (text *) data;
699
0
  }
700
701
#ifdef USE_LIBXML
702
703
  /*
704
   * Parse the input according to the xmloption.
705
   *
706
   * preserve_whitespace is set to false in case we are indenting, otherwise
707
   * libxml2 will fail to indent elements that have whitespace between them.
708
   */
709
  doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
710
          &parsed_xmloptiontype, &content_nodes,
711
          (Node *) &escontext);
712
  if (doc == NULL || escontext.error_occurred)
713
  {
714
    if (doc)
715
      xmlFreeDoc(doc);
716
    /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
717
    ereport(ERROR,
718
        (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
719
         errmsg("not an XML document")));
720
  }
721
722
  /* If we weren't asked to indent, we're done. */
723
  if (!indent)
724
  {
725
    xmlFreeDoc(doc);
726
    return (text *) data;
727
  }
728
729
  /*
730
   * Otherwise, we gotta spin up some error handling.  Unlike most other
731
   * routines in this module, we already have a libxml "doc" structure to
732
   * free, so we need to call pg_xml_init() inside the PG_TRY and be
733
   * prepared for it to fail (typically due to palloc OOM).
734
   */
735
  PG_TRY();
736
  {
737
    size_t    decl_len = 0;
738
739
    xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
740
741
    /* The serialized data will go into this buffer. */
742
    buf = xmlBufferCreate();
743
744
    if (buf == NULL || xmlerrcxt->err_occurred)
745
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
746
            "could not allocate xmlBuffer");
747
748
    /* Detect whether there's an XML declaration */
749
    parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
750
751
    /*
752
     * Emit declaration only if the input had one.  Note: some versions of
753
     * xmlSaveToBuffer leak memory if a non-null encoding argument is
754
     * passed, so don't do that.  We don't want any encoding conversion
755
     * anyway.
756
     */
757
    if (decl_len == 0)
758
      ctxt = xmlSaveToBuffer(buf, NULL,
759
                   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
760
    else
761
      ctxt = xmlSaveToBuffer(buf, NULL,
762
                   XML_SAVE_FORMAT);
763
764
    if (ctxt == NULL || xmlerrcxt->err_occurred)
765
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
766
            "could not allocate xmlSaveCtxt");
767
768
    if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
769
    {
770
      /* If it's a document, saving is easy. */
771
      if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
772
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
773
              "could not save document to xmlBuffer");
774
    }
775
    else if (content_nodes != NULL)
776
    {
777
      /*
778
       * Deal with the case where we have non-singly-rooted XML.
779
       * libxml's dump functions don't work well for that without help.
780
       * We build a fake root node that serves as a container for the
781
       * content nodes, and then iterate over the nodes.
782
       */
783
      xmlNodePtr  root;
784
      xmlNodePtr  oldroot;
785
      xmlNodePtr  newline;
786
787
      root = xmlNewNode(NULL, (const xmlChar *) "content-root");
788
      if (root == NULL || xmlerrcxt->err_occurred)
789
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
790
              "could not allocate xml node");
791
792
      /*
793
       * This attaches root to doc, so we need not free it separately...
794
       * but instead, we have to free the old root if there was one.
795
       */
796
      oldroot = xmlDocSetRootElement(doc, root);
797
      if (oldroot != NULL)
798
        xmlFreeNode(oldroot);
799
800
      if (xmlAddChildList(root, content_nodes) == NULL ||
801
        xmlerrcxt->err_occurred)
802
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
803
              "could not append xml node list");
804
805
      /*
806
       * We use this node to insert newlines in the dump.  Note: in at
807
       * least some libxml versions, xmlNewDocText would not attach the
808
       * node to the document even if we passed it.  Therefore, manage
809
       * freeing of this node manually, and pass NULL here to make sure
810
       * there's not a dangling link.
811
       */
812
      newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
813
      if (newline == NULL || xmlerrcxt->err_occurred)
814
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
815
              "could not allocate xml node");
816
817
      for (xmlNodePtr node = root->children; node; node = node->next)
818
      {
819
        /* insert newlines between nodes */
820
        if (node->type != XML_TEXT_NODE && node->prev != NULL)
821
        {
822
          if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
823
          {
824
            xmlFreeNode(newline);
825
            xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
826
                  "could not save newline to xmlBuffer");
827
          }
828
        }
829
830
        if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
831
        {
832
          xmlFreeNode(newline);
833
          xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
834
                "could not save content to xmlBuffer");
835
        }
836
      }
837
838
      xmlFreeNode(newline);
839
    }
840
841
    if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
842
    {
843
      ctxt = NULL;    /* don't try to close it again */
844
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
845
            "could not close xmlSaveCtxtPtr");
846
    }
847
848
    /*
849
     * xmlDocContentDumpOutput may add a trailing newline, so remove that.
850
     */
851
    if (xmloption_arg == XMLOPTION_DOCUMENT)
852
    {
853
      const char *str = (const char *) xmlBufferContent(buf);
854
      int     len = xmlBufferLength(buf);
855
856
      while (len > 0 && (str[len - 1] == '\n' ||
857
                 str[len - 1] == '\r'))
858
        len--;
859
860
      result = cstring_to_text_with_len(str, len);
861
    }
862
    else
863
      result = (text *) xmlBuffer_to_xmltype(buf);
864
  }
865
  PG_CATCH();
866
  {
867
    if (ctxt)
868
      xmlSaveClose(ctxt);
869
    if (buf)
870
      xmlBufferFree(buf);
871
    xmlFreeDoc(doc);
872
873
    if (xmlerrcxt)
874
      pg_xml_done(xmlerrcxt, true);
875
876
    PG_RE_THROW();
877
  }
878
  PG_END_TRY();
879
880
  xmlBufferFree(buf);
881
  xmlFreeDoc(doc);
882
883
  pg_xml_done(xmlerrcxt, false);
884
885
  return result;
886
#else
887
0
  NO_XML_SUPPORT();
888
0
  return NULL;
889
0
#endif
890
0
}
891
892
893
xmltype *
894
xmlelement(XmlExpr *xexpr,
895
       Datum *named_argvalue, bool *named_argnull,
896
       Datum *argvalue, bool *argnull)
897
0
{
898
#ifdef USE_LIBXML
899
  xmltype    *result;
900
  List     *named_arg_strings;
901
  List     *arg_strings;
902
  int     i;
903
  ListCell   *arg;
904
  ListCell   *narg;
905
  PgXmlErrorContext *xmlerrcxt;
906
  volatile xmlBufferPtr buf = NULL;
907
  volatile xmlTextWriterPtr writer = NULL;
908
909
  /*
910
   * All arguments are already evaluated, and their values are passed in the
911
   * named_argvalue/named_argnull or argvalue/argnull arrays.  This avoids
912
   * issues if one of the arguments involves a call to some other function
913
   * or subsystem that wants to use libxml on its own terms.  We examine the
914
   * original XmlExpr to identify the numbers and types of the arguments.
915
   */
916
  named_arg_strings = NIL;
917
  i = 0;
918
  foreach(arg, xexpr->named_args)
919
  {
920
    Expr     *e = (Expr *) lfirst(arg);
921
    char     *str;
922
923
    if (named_argnull[i])
924
      str = NULL;
925
    else
926
      str = map_sql_value_to_xml_value(named_argvalue[i],
927
                       exprType((Node *) e),
928
                       false);
929
    named_arg_strings = lappend(named_arg_strings, str);
930
    i++;
931
  }
932
933
  arg_strings = NIL;
934
  i = 0;
935
  foreach(arg, xexpr->args)
936
  {
937
    Expr     *e = (Expr *) lfirst(arg);
938
    char     *str;
939
940
    /* here we can just forget NULL elements immediately */
941
    if (!argnull[i])
942
    {
943
      str = map_sql_value_to_xml_value(argvalue[i],
944
                       exprType((Node *) e),
945
                       true);
946
      arg_strings = lappend(arg_strings, str);
947
    }
948
    i++;
949
  }
950
951
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
952
953
  PG_TRY();
954
  {
955
    buf = xmlBufferCreate();
956
    if (buf == NULL || xmlerrcxt->err_occurred)
957
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
958
            "could not allocate xmlBuffer");
959
    writer = xmlNewTextWriterMemory(buf, 0);
960
    if (writer == NULL || xmlerrcxt->err_occurred)
961
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
962
            "could not allocate xmlTextWriter");
963
964
    if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
965
      xmlerrcxt->err_occurred)
966
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
967
            "could not start xml element");
968
969
    forboth(arg, named_arg_strings, narg, xexpr->arg_names)
970
    {
971
      char     *str = (char *) lfirst(arg);
972
      char     *argname = strVal(lfirst(narg));
973
974
      if (str)
975
      {
976
        if (xmlTextWriterWriteAttribute(writer,
977
                        (xmlChar *) argname,
978
                        (xmlChar *) str) < 0 ||
979
          xmlerrcxt->err_occurred)
980
          xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
981
                "could not write xml attribute");
982
      }
983
    }
984
985
    foreach(arg, arg_strings)
986
    {
987
      char     *str = (char *) lfirst(arg);
988
989
      if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
990
        xmlerrcxt->err_occurred)
991
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
992
              "could not write raw xml text");
993
    }
994
995
    if (xmlTextWriterEndElement(writer) < 0 ||
996
      xmlerrcxt->err_occurred)
997
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
998
            "could not end xml element");
999
1000
    /* we MUST do this now to flush data out to the buffer ... */
1001
    xmlFreeTextWriter(writer);
1002
    writer = NULL;
1003
1004
    result = xmlBuffer_to_xmltype(buf);
1005
  }
1006
  PG_CATCH();
1007
  {
1008
    if (writer)
1009
      xmlFreeTextWriter(writer);
1010
    if (buf)
1011
      xmlBufferFree(buf);
1012
1013
    pg_xml_done(xmlerrcxt, true);
1014
1015
    PG_RE_THROW();
1016
  }
1017
  PG_END_TRY();
1018
1019
  xmlBufferFree(buf);
1020
1021
  pg_xml_done(xmlerrcxt, false);
1022
1023
  return result;
1024
#else
1025
0
  NO_XML_SUPPORT();
1026
0
  return NULL;
1027
0
#endif
1028
0
}
1029
1030
1031
xmltype *
1032
xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
1033
0
{
1034
#ifdef USE_LIBXML
1035
  xmlDocPtr doc;
1036
1037
  doc = xml_parse(data, xmloption_arg, preserve_whitespace,
1038
          GetDatabaseEncoding(), NULL, NULL, NULL);
1039
  xmlFreeDoc(doc);
1040
1041
  return (xmltype *) data;
1042
#else
1043
0
  NO_XML_SUPPORT();
1044
0
  return NULL;
1045
0
#endif
1046
0
}
1047
1048
1049
xmltype *
1050
xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1051
0
{
1052
#ifdef USE_LIBXML
1053
  xmltype    *result;
1054
  StringInfoData buf;
1055
1056
  if (pg_strcasecmp(target, "xml") == 0)
1057
    ereport(ERROR,
1058
        (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1059
         errmsg("invalid XML processing instruction"),
1060
         errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1061
1062
  /*
1063
   * Following the SQL standard, the null check comes after the syntax check
1064
   * above.
1065
   */
1066
  *result_is_null = arg_is_null;
1067
  if (*result_is_null)
1068
    return NULL;
1069
1070
  initStringInfo(&buf);
1071
1072
  appendStringInfo(&buf, "<?%s", target);
1073
1074
  if (arg != NULL)
1075
  {
1076
    char     *string;
1077
1078
    string = text_to_cstring(arg);
1079
    if (strstr(string, "?>") != NULL)
1080
      ereport(ERROR,
1081
          (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1082
           errmsg("invalid XML processing instruction"),
1083
           errdetail("XML processing instruction cannot contain \"?>\".")));
1084
1085
    appendStringInfoChar(&buf, ' ');
1086
    appendStringInfoString(&buf, string + strspn(string, " "));
1087
    pfree(string);
1088
  }
1089
  appendStringInfoString(&buf, "?>");
1090
1091
  result = stringinfo_to_xmltype(&buf);
1092
  pfree(buf.data);
1093
  return result;
1094
#else
1095
0
  NO_XML_SUPPORT();
1096
0
  return NULL;
1097
0
#endif
1098
0
}
1099
1100
1101
xmltype *
1102
xmlroot(xmltype *data, text *version, int standalone)
1103
0
{
1104
#ifdef USE_LIBXML
1105
  char     *str;
1106
  size_t    len;
1107
  xmlChar    *orig_version;
1108
  int     orig_standalone;
1109
  StringInfoData buf;
1110
1111
  len = VARSIZE(data) - VARHDRSZ;
1112
  str = text_to_cstring((text *) data);
1113
1114
  parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1115
1116
  if (version)
1117
    orig_version = xml_text2xmlChar(version);
1118
  else
1119
    orig_version = NULL;
1120
1121
  switch (standalone)
1122
  {
1123
    case XML_STANDALONE_YES:
1124
      orig_standalone = 1;
1125
      break;
1126
    case XML_STANDALONE_NO:
1127
      orig_standalone = 0;
1128
      break;
1129
    case XML_STANDALONE_NO_VALUE:
1130
      orig_standalone = -1;
1131
      break;
1132
    case XML_STANDALONE_OMITTED:
1133
      /* leave original value */
1134
      break;
1135
  }
1136
1137
  initStringInfo(&buf);
1138
  print_xml_decl(&buf, orig_version, 0, orig_standalone);
1139
  appendStringInfoString(&buf, str + len);
1140
1141
  return stringinfo_to_xmltype(&buf);
1142
#else
1143
0
  NO_XML_SUPPORT();
1144
0
  return NULL;
1145
0
#endif
1146
0
}
1147
1148
1149
/*
1150
 * Validate document (given as string) against DTD (given as external link)
1151
 *
1152
 * This has been removed because it is a security hole: unprivileged users
1153
 * should not be able to use Postgres to fetch arbitrary external files,
1154
 * which unfortunately is exactly what libxml is willing to do with the DTD
1155
 * parameter.
1156
 */
1157
Datum
1158
xmlvalidate(PG_FUNCTION_ARGS)
1159
0
{
1160
0
  ereport(ERROR,
1161
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1162
0
       errmsg("xmlvalidate is not implemented")));
1163
0
  return 0;
1164
0
}
1165
1166
1167
bool
1168
xml_is_document(xmltype *arg)
1169
0
{
1170
#ifdef USE_LIBXML
1171
  xmlDocPtr doc;
1172
  ErrorSaveContext escontext = {T_ErrorSaveContext};
1173
1174
  /*
1175
   * We'll report "true" if no soft error is reported by xml_parse().
1176
   */
1177
  doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1178
          GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1179
  if (doc)
1180
    xmlFreeDoc(doc);
1181
1182
  return !escontext.error_occurred;
1183
#else             /* not USE_LIBXML */
1184
0
  NO_XML_SUPPORT();
1185
0
  return false;
1186
0
#endif              /* not USE_LIBXML */
1187
0
}
1188
1189
1190
#ifdef USE_LIBXML
1191
1192
/*
1193
 * pg_xml_init_library --- set up for use of libxml
1194
 *
1195
 * This should be called by each function that is about to use libxml
1196
 * facilities but doesn't require error handling.  It initializes libxml
1197
 * and verifies compatibility with the loaded libxml version.  These are
1198
 * once-per-session activities.
1199
 *
1200
 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1201
 * check)
1202
 */
1203
void
1204
pg_xml_init_library(void)
1205
{
1206
  static bool first_time = true;
1207
1208
  if (first_time)
1209
  {
1210
    /* Stuff we need do only once per session */
1211
1212
    /*
1213
     * Currently, we have no pure UTF-8 support for internals -- check if
1214
     * we can work.
1215
     */
1216
    if (sizeof(char) != sizeof(xmlChar))
1217
      ereport(ERROR,
1218
          (errmsg("could not initialize XML library"),
1219
           errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1220
                 sizeof(char), sizeof(xmlChar))));
1221
1222
#ifdef USE_LIBXMLCONTEXT
1223
    /* Set up libxml's memory allocation our way */
1224
    xml_memory_init();
1225
#endif
1226
1227
    /* Check library compatibility */
1228
    LIBXML_TEST_VERSION;
1229
1230
    first_time = false;
1231
  }
1232
}
1233
1234
/*
1235
 * pg_xml_init --- set up for use of libxml and register an error handler
1236
 *
1237
 * This should be called by each function that is about to use libxml
1238
 * facilities and requires error handling.  It initializes libxml with
1239
 * pg_xml_init_library() and establishes our libxml error handler.
1240
 *
1241
 * strictness determines which errors are reported and which are ignored.
1242
 *
1243
 * Calls to this function MUST be followed by a PG_TRY block that guarantees
1244
 * that pg_xml_done() is called during either normal or error exit.
1245
 *
1246
 * This is exported for use by contrib/xml2, as well as other code that might
1247
 * wish to share use of this module's libxml error handler.
1248
 */
1249
PgXmlErrorContext *
1250
pg_xml_init(PgXmlStrictness strictness)
1251
{
1252
  PgXmlErrorContext *errcxt;
1253
  void     *new_errcxt;
1254
1255
  /* Do one-time setup if needed */
1256
  pg_xml_init_library();
1257
1258
  /* Create error handling context structure */
1259
  errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1260
  errcxt->magic = ERRCXT_MAGIC;
1261
  errcxt->strictness = strictness;
1262
  errcxt->err_occurred = false;
1263
  initStringInfo(&errcxt->err_buf);
1264
1265
  /*
1266
   * Save original error handler and install ours. libxml originally didn't
1267
   * distinguish between the contexts for generic and for structured error
1268
   * handlers.  If we're using an old libxml version, we must thus save the
1269
   * generic error context, even though we're using a structured error
1270
   * handler.
1271
   */
1272
  errcxt->saved_errfunc = xmlStructuredError;
1273
1274
#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1275
  errcxt->saved_errcxt = xmlStructuredErrorContext;
1276
#else
1277
  errcxt->saved_errcxt = xmlGenericErrorContext;
1278
#endif
1279
1280
  xmlSetStructuredErrorFunc(errcxt, xml_errorHandler);
1281
1282
  /*
1283
   * Verify that xmlSetStructuredErrorFunc set the context variable we
1284
   * expected it to.  If not, the error context pointer we just saved is not
1285
   * the correct thing to restore, and since that leaves us without a way to
1286
   * restore the context in pg_xml_done, we must fail.
1287
   *
1288
   * The only known situation in which this test fails is if we compile with
1289
   * headers from a libxml2 that doesn't track the structured error context
1290
   * separately (< 2.7.4), but at runtime use a version that does, or vice
1291
   * versa.  The libxml2 authors did not treat that change as constituting
1292
   * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1293
   * fails to protect us from this.
1294
   */
1295
1296
#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1297
  new_errcxt = xmlStructuredErrorContext;
1298
#else
1299
  new_errcxt = xmlGenericErrorContext;
1300
#endif
1301
1302
  if (new_errcxt != errcxt)
1303
    ereport(ERROR,
1304
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1305
         errmsg("could not set up XML error handler"),
1306
         errhint("This probably indicates that the version of libxml2"
1307
             " being used is not compatible with the libxml2"
1308
             " header files that PostgreSQL was built with.")));
1309
1310
  /*
1311
   * Also, install an entity loader to prevent unwanted fetches of external
1312
   * files and URLs.
1313
   */
1314
  errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1315
  xmlSetExternalEntityLoader(xmlPgEntityLoader);
1316
1317
  return errcxt;
1318
}
1319
1320
1321
/*
1322
 * pg_xml_done --- restore previous libxml error handling
1323
 *
1324
 * Resets libxml's global error-handling state to what it was before
1325
 * pg_xml_init() was called.
1326
 *
1327
 * This routine verifies that all pending errors have been dealt with
1328
 * (in assert-enabled builds, anyway).
1329
 */
1330
void
1331
pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1332
{
1333
  void     *cur_errcxt;
1334
1335
  /* An assert seems like enough protection here */
1336
  Assert(errcxt->magic == ERRCXT_MAGIC);
1337
1338
  /*
1339
   * In a normal exit, there should be no un-handled libxml errors.  But we
1340
   * shouldn't try to enforce this during error recovery, since the longjmp
1341
   * could have been thrown before xml_ereport had a chance to run.
1342
   */
1343
  Assert(!errcxt->err_occurred || isError);
1344
1345
  /*
1346
   * Check that libxml's global state is correct, warn if not.  This is a
1347
   * real test and not an Assert because it has a higher probability of
1348
   * happening.
1349
   */
1350
#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1351
  cur_errcxt = xmlStructuredErrorContext;
1352
#else
1353
  cur_errcxt = xmlGenericErrorContext;
1354
#endif
1355
1356
  if (cur_errcxt != errcxt)
1357
    elog(WARNING, "libxml error handling state is out of sync with xml.c");
1358
1359
  /* Restore the saved handlers */
1360
  xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1361
  xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1362
1363
  /*
1364
   * Mark the struct as invalid, just in case somebody somehow manages to
1365
   * call xml_errorHandler or xml_ereport with it.
1366
   */
1367
  errcxt->magic = 0;
1368
1369
  /* Release memory */
1370
  pfree(errcxt->err_buf.data);
1371
  pfree(errcxt);
1372
}
1373
1374
1375
/*
1376
 * pg_xml_error_occurred() --- test the error flag
1377
 */
1378
bool
1379
pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1380
{
1381
  return errcxt->err_occurred;
1382
}
1383
1384
1385
/*
1386
 * SQL/XML allows storing "XML documents" or "XML content".  "XML
1387
 * documents" are specified by the XML specification and are parsed
1388
 * easily by libxml.  "XML content" is specified by SQL/XML as the
1389
 * production "XMLDecl? content".  But libxml can only parse the
1390
 * "content" part, so we have to parse the XML declaration ourselves
1391
 * to complete this.
1392
 */
1393
1394
#define CHECK_XML_SPACE(p) \
1395
  do { \
1396
    if (!xmlIsBlank_ch(*(p))) \
1397
      return XML_ERR_SPACE_REQUIRED; \
1398
  } while (0)
1399
1400
#define SKIP_XML_SPACE(p) \
1401
  while (xmlIsBlank_ch(*(p))) (p)++
1402
1403
/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1404
/* Beware of multiple evaluations of argument! */
1405
#define PG_XMLISNAMECHAR(c) \
1406
  (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1407
      || xmlIsDigit_ch(c) \
1408
      || c == '.' || c == '-' || c == '_' || c == ':' \
1409
      || xmlIsCombiningQ(c) \
1410
      || xmlIsExtender_ch(c))
1411
1412
/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1413
static xmlChar *
1414
xml_pnstrdup(const xmlChar *str, size_t len)
1415
{
1416
  xmlChar    *result;
1417
1418
  result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1419
  memcpy(result, str, len * sizeof(xmlChar));
1420
  result[len] = 0;
1421
  return result;
1422
}
1423
1424
/* Ditto, except input is char* */
1425
static xmlChar *
1426
pg_xmlCharStrndup(const char *str, size_t len)
1427
{
1428
  xmlChar    *result;
1429
1430
  result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1431
  memcpy(result, str, len);
1432
  result[len] = '\0';
1433
1434
  return result;
1435
}
1436
1437
/*
1438
 * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1439
 *
1440
 * The input xmlChar is freed regardless of success of the copy.
1441
 */
1442
static char *
1443
xml_pstrdup_and_free(xmlChar *str)
1444
{
1445
  char     *result;
1446
1447
  if (str)
1448
  {
1449
    PG_TRY();
1450
    {
1451
      result = pstrdup((char *) str);
1452
    }
1453
    PG_FINALLY();
1454
    {
1455
      xmlFree(str);
1456
    }
1457
    PG_END_TRY();
1458
  }
1459
  else
1460
    result = NULL;
1461
1462
  return result;
1463
}
1464
1465
/*
1466
 * str is the null-terminated input string.  Remaining arguments are
1467
 * output arguments; each can be NULL if value is not wanted.
1468
 * version and encoding are returned as locally-palloc'd strings.
1469
 * Result is 0 if OK, an error code if not.
1470
 */
1471
static int
1472
parse_xml_decl(const xmlChar *str, size_t *lenp,
1473
         xmlChar **version, xmlChar **encoding, int *standalone)
1474
{
1475
  const xmlChar *p;
1476
  const xmlChar *save_p;
1477
  size_t    len;
1478
  int     utf8char;
1479
  int     utf8len;
1480
1481
  /*
1482
   * Only initialize libxml.  We don't need error handling here, but we do
1483
   * need to make sure libxml is initialized before calling any of its
1484
   * functions.  Note that this is safe (and a no-op) if caller has already
1485
   * done pg_xml_init().
1486
   */
1487
  pg_xml_init_library();
1488
1489
  /* Initialize output arguments to "not present" */
1490
  if (version)
1491
    *version = NULL;
1492
  if (encoding)
1493
    *encoding = NULL;
1494
  if (standalone)
1495
    *standalone = -1;
1496
1497
  p = str;
1498
1499
  if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1500
    goto finished;
1501
1502
  /*
1503
   * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1504
   * rather than an XMLDecl, so we have done what we came to do and found no
1505
   * XMLDecl.
1506
   *
1507
   * We need an input length value for xmlGetUTF8Char, but there's no need
1508
   * to count the whole document size, so use strnlen not strlen.
1509
   */
1510
  utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1511
  utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1512
  if (PG_XMLISNAMECHAR(utf8char))
1513
    goto finished;
1514
1515
  p += 5;
1516
1517
  /* version */
1518
  CHECK_XML_SPACE(p);
1519
  SKIP_XML_SPACE(p);
1520
  if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1521
    return XML_ERR_VERSION_MISSING;
1522
  p += 7;
1523
  SKIP_XML_SPACE(p);
1524
  if (*p != '=')
1525
    return XML_ERR_VERSION_MISSING;
1526
  p += 1;
1527
  SKIP_XML_SPACE(p);
1528
1529
  if (*p == '\'' || *p == '"')
1530
  {
1531
    const xmlChar *q;
1532
1533
    q = xmlStrchr(p + 1, *p);
1534
    if (!q)
1535
      return XML_ERR_VERSION_MISSING;
1536
1537
    if (version)
1538
      *version = xml_pnstrdup(p + 1, q - p - 1);
1539
    p = q + 1;
1540
  }
1541
  else
1542
    return XML_ERR_VERSION_MISSING;
1543
1544
  /* encoding */
1545
  save_p = p;
1546
  SKIP_XML_SPACE(p);
1547
  if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1548
  {
1549
    CHECK_XML_SPACE(save_p);
1550
    p += 8;
1551
    SKIP_XML_SPACE(p);
1552
    if (*p != '=')
1553
      return XML_ERR_MISSING_ENCODING;
1554
    p += 1;
1555
    SKIP_XML_SPACE(p);
1556
1557
    if (*p == '\'' || *p == '"')
1558
    {
1559
      const xmlChar *q;
1560
1561
      q = xmlStrchr(p + 1, *p);
1562
      if (!q)
1563
        return XML_ERR_MISSING_ENCODING;
1564
1565
      if (encoding)
1566
        *encoding = xml_pnstrdup(p + 1, q - p - 1);
1567
      p = q + 1;
1568
    }
1569
    else
1570
      return XML_ERR_MISSING_ENCODING;
1571
  }
1572
  else
1573
  {
1574
    p = save_p;
1575
  }
1576
1577
  /* standalone */
1578
  save_p = p;
1579
  SKIP_XML_SPACE(p);
1580
  if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1581
  {
1582
    CHECK_XML_SPACE(save_p);
1583
    p += 10;
1584
    SKIP_XML_SPACE(p);
1585
    if (*p != '=')
1586
      return XML_ERR_STANDALONE_VALUE;
1587
    p += 1;
1588
    SKIP_XML_SPACE(p);
1589
    if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1590
      xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1591
    {
1592
      if (standalone)
1593
        *standalone = 1;
1594
      p += 5;
1595
    }
1596
    else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1597
         xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1598
    {
1599
      if (standalone)
1600
        *standalone = 0;
1601
      p += 4;
1602
    }
1603
    else
1604
      return XML_ERR_STANDALONE_VALUE;
1605
  }
1606
  else
1607
  {
1608
    p = save_p;
1609
  }
1610
1611
  SKIP_XML_SPACE(p);
1612
  if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1613
    return XML_ERR_XMLDECL_NOT_FINISHED;
1614
  p += 2;
1615
1616
finished:
1617
  len = p - str;
1618
1619
  for (p = str; p < str + len; p++)
1620
    if (*p > 127)
1621
      return XML_ERR_INVALID_CHAR;
1622
1623
  if (lenp)
1624
    *lenp = len;
1625
1626
  return XML_ERR_OK;
1627
}
1628
1629
1630
/*
1631
 * Write an XML declaration.  On output, we adjust the XML declaration
1632
 * as follows.  (These rules are the moral equivalent of the clause
1633
 * "Serialization of an XML value" in the SQL standard.)
1634
 *
1635
 * We try to avoid generating an XML declaration if possible.  This is
1636
 * so that you don't get trivial things like xml '<foo/>' resulting in
1637
 * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1638
 * must provide a declaration if the standalone property is specified
1639
 * or if we include an encoding declaration.  If we have a
1640
 * declaration, we must specify a version (XML requires this).
1641
 * Otherwise we only make a declaration if the version is not "1.0",
1642
 * which is the default version specified in SQL:2003.
1643
 */
1644
static bool
1645
print_xml_decl(StringInfo buf, const xmlChar *version,
1646
         pg_enc encoding, int standalone)
1647
{
1648
  if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1649
    || (encoding && encoding != PG_UTF8)
1650
    || standalone != -1)
1651
  {
1652
    appendStringInfoString(buf, "<?xml");
1653
1654
    if (version)
1655
      appendStringInfo(buf, " version=\"%s\"", version);
1656
    else
1657
      appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1658
1659
    if (encoding && encoding != PG_UTF8)
1660
    {
1661
      /*
1662
       * XXX might be useful to convert this to IANA names (ISO-8859-1
1663
       * instead of LATIN1 etc.); needs field experience
1664
       */
1665
      appendStringInfo(buf, " encoding=\"%s\"",
1666
               pg_encoding_to_char(encoding));
1667
    }
1668
1669
    if (standalone == 1)
1670
      appendStringInfoString(buf, " standalone=\"yes\"");
1671
    else if (standalone == 0)
1672
      appendStringInfoString(buf, " standalone=\"no\"");
1673
    appendStringInfoString(buf, "?>");
1674
1675
    return true;
1676
  }
1677
  else
1678
    return false;
1679
}
1680
1681
/*
1682
 * Test whether an input that is to be parsed as CONTENT contains a DTD.
1683
 *
1684
 * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1685
 * satisfied by a document with a DTD, which is a bit of a wart, as it means
1686
 * the CONTENT type is not a proper superset of DOCUMENT.  SQL/XML:2006 and
1687
 * later fix that, by redefining content with reference to the "more
1688
 * permissive" Document Node of the XQuery/XPath Data Model, such that any
1689
 * DOCUMENT value is indeed also a CONTENT value.  That definition is more
1690
 * useful, as CONTENT becomes usable for parsing input of unknown form (think
1691
 * pg_restore).
1692
 *
1693
 * As used below in parse_xml when parsing for CONTENT, libxml does not give
1694
 * us the 2006+ behavior, but only the 2003; it will choke if the input has
1695
 * a DTD.  But we can provide the 2006+ definition of CONTENT easily enough,
1696
 * by detecting this case first and simply doing the parse as DOCUMENT.
1697
 *
1698
 * A DTD can be found arbitrarily far in, but that would be a contrived case;
1699
 * it will ordinarily start within a few dozen characters.  The only things
1700
 * that can precede it are an XMLDecl (here, the caller will have called
1701
 * parse_xml_decl already), whitespace, comments, and processing instructions.
1702
 * This function need only return true if it sees a valid sequence of such
1703
 * things leading to <!DOCTYPE.  It can simply return false in any other
1704
 * cases, including malformed input; that will mean the input gets parsed as
1705
 * CONTENT as originally planned, with libxml reporting any errors.
1706
 *
1707
 * This is only to be called from xml_parse, when pg_xml_init has already
1708
 * been called.  The input is already in UTF8 encoding.
1709
 */
1710
static bool
1711
xml_doctype_in_content(const xmlChar *str)
1712
{
1713
  const xmlChar *p = str;
1714
1715
  for (;;)
1716
  {
1717
    const xmlChar *e;
1718
1719
    SKIP_XML_SPACE(p);
1720
    if (*p != '<')
1721
      return false;
1722
    p++;
1723
1724
    if (*p == '!')
1725
    {
1726
      p++;
1727
1728
      /* if we see <!DOCTYPE, we can return true */
1729
      if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1730
        return true;
1731
1732
      /* otherwise, if it's not a comment, fail */
1733
      if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1734
        return false;
1735
      /* find end of comment: find -- and a > must follow */
1736
      p = xmlStrstr(p + 2, (xmlChar *) "--");
1737
      if (!p || p[2] != '>')
1738
        return false;
1739
      /* advance over comment, and keep scanning */
1740
      p += 3;
1741
      continue;
1742
    }
1743
1744
    /* otherwise, if it's not a PI <?target something?>, fail */
1745
    if (*p != '?')
1746
      return false;
1747
    p++;
1748
1749
    /* find end of PI (the string ?> is forbidden within a PI) */
1750
    e = xmlStrstr(p, (xmlChar *) "?>");
1751
    if (!e)
1752
      return false;
1753
1754
    /* advance over PI, keep scanning */
1755
    p = e + 2;
1756
  }
1757
}
1758
1759
1760
/*
1761
 * Convert a text object to XML internal representation
1762
 *
1763
 * data is the source data (must not be toasted!), encoding is its encoding,
1764
 * and xmloption_arg and preserve_whitespace are options for the
1765
 * transformation.
1766
 *
1767
 * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1768
 * XmlOptionType actually used to parse the input (typically the same as
1769
 * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1770
 *
1771
 * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1772
 * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
1773
 * to *parsed_nodes.  (It is caller's responsibility to free that.)
1774
 *
1775
 * Errors normally result in ereport(ERROR), but if escontext is an
1776
 * ErrorSaveContext, then "safe" errors are reported there instead, and the
1777
 * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1778
 *
1779
 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1780
 * else a permanent memory leak will ensue!  But note the result could
1781
 * be NULL after a soft error.
1782
 *
1783
 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1784
 * yet do not use SAX - see xmlreader.c)
1785
 */
1786
static xmlDocPtr
1787
xml_parse(text *data, XmlOptionType xmloption_arg,
1788
      bool preserve_whitespace, int encoding,
1789
      XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1790
      Node *escontext)
1791
{
1792
  int32   len;
1793
  xmlChar    *string;
1794
  xmlChar    *utf8string;
1795
  PgXmlErrorContext *xmlerrcxt;
1796
  volatile xmlParserCtxtPtr ctxt = NULL;
1797
  volatile xmlDocPtr doc = NULL;
1798
  volatile int save_keep_blanks = -1;
1799
1800
  /*
1801
   * This step looks annoyingly redundant, but we must do it to have a
1802
   * null-terminated string in case encoding conversion isn't required.
1803
   */
1804
  len = VARSIZE_ANY_EXHDR(data);  /* will be useful later */
1805
  string = xml_text2xmlChar(data);
1806
1807
  /*
1808
   * If the data isn't UTF8, we must translate before giving it to libxml.
1809
   *
1810
   * XXX ideally, we'd catch any encoding conversion failure and return a
1811
   * soft error.  However, failure to convert to UTF8 should be pretty darn
1812
   * rare, so for now this is left undone.
1813
   */
1814
  utf8string = pg_do_encoding_conversion(string,
1815
                       len,
1816
                       encoding,
1817
                       PG_UTF8);
1818
1819
  /* Start up libxml and its parser */
1820
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1821
1822
  /* Use a TRY block to ensure we clean up correctly */
1823
  PG_TRY();
1824
  {
1825
    bool    parse_as_document = false;
1826
    int     res_code;
1827
    size_t    count = 0;
1828
    xmlChar    *version = NULL;
1829
    int     standalone = 0;
1830
1831
    /* Any errors here are reported as hard ereport's */
1832
    xmlInitParser();
1833
1834
    /* Decide whether to parse as document or content */
1835
    if (xmloption_arg == XMLOPTION_DOCUMENT)
1836
      parse_as_document = true;
1837
    else
1838
    {
1839
      /* Parse and skip over the XML declaration, if any */
1840
      res_code = parse_xml_decl(utf8string,
1841
                    &count, &version, NULL, &standalone);
1842
      if (res_code != 0)
1843
      {
1844
        errsave(escontext,
1845
            errcode(ERRCODE_INVALID_XML_CONTENT),
1846
            errmsg_internal("invalid XML content: invalid XML declaration"),
1847
            errdetail_for_xml_code(res_code));
1848
        goto fail;
1849
      }
1850
1851
      /* Is there a DOCTYPE element? */
1852
      if (xml_doctype_in_content(utf8string + count))
1853
        parse_as_document = true;
1854
    }
1855
1856
    /* initialize output parameters */
1857
    if (parsed_xmloptiontype != NULL)
1858
      *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1859
        XMLOPTION_CONTENT;
1860
    if (parsed_nodes != NULL)
1861
      *parsed_nodes = NULL;
1862
1863
    if (parse_as_document)
1864
    {
1865
      int     options;
1866
1867
      /* set up parser context used by xmlCtxtReadDoc */
1868
      ctxt = xmlNewParserCtxt();
1869
      if (ctxt == NULL || xmlerrcxt->err_occurred)
1870
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1871
              "could not allocate parser context");
1872
1873
      /*
1874
       * Select parse options.
1875
       *
1876
       * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1877
       * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1878
       * by internal DTD are applied'.  As for external DTDs, we try to
1879
       * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1880
       * doesn't really happen because xmlPgEntityLoader prevents it.
1881
       */
1882
      options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1883
        | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1884
1885
      doc = xmlCtxtReadDoc(ctxt, utf8string,
1886
                 NULL,  /* no URL */
1887
                 "UTF-8",
1888
                 options);
1889
1890
      if (doc == NULL || xmlerrcxt->err_occurred)
1891
      {
1892
        /* Use original option to decide which error code to report */
1893
        if (xmloption_arg == XMLOPTION_DOCUMENT)
1894
          xml_errsave(escontext, xmlerrcxt,
1895
                ERRCODE_INVALID_XML_DOCUMENT,
1896
                "invalid XML document");
1897
        else
1898
          xml_errsave(escontext, xmlerrcxt,
1899
                ERRCODE_INVALID_XML_CONTENT,
1900
                "invalid XML content");
1901
        goto fail;
1902
      }
1903
    }
1904
    else
1905
    {
1906
      /* set up document that xmlParseBalancedChunkMemory will add to */
1907
      doc = xmlNewDoc(version);
1908
      if (doc == NULL || xmlerrcxt->err_occurred)
1909
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1910
              "could not allocate XML document");
1911
1912
      Assert(doc->encoding == NULL);
1913
      doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1914
      if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1915
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1916
              "could not allocate XML document");
1917
      doc->standalone = standalone;
1918
1919
      /* set parse options --- have to do this the ugly way */
1920
      save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
1921
1922
      /* allow empty content */
1923
      if (*(utf8string + count))
1924
      {
1925
        res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1926
                             utf8string + count,
1927
                             parsed_nodes);
1928
        if (res_code != 0 || xmlerrcxt->err_occurred)
1929
        {
1930
          xml_errsave(escontext, xmlerrcxt,
1931
                ERRCODE_INVALID_XML_CONTENT,
1932
                "invalid XML content");
1933
          goto fail;
1934
        }
1935
      }
1936
    }
1937
1938
fail:
1939
    ;
1940
  }
1941
  PG_CATCH();
1942
  {
1943
    if (save_keep_blanks != -1)
1944
      xmlKeepBlanksDefault(save_keep_blanks);
1945
    if (doc != NULL)
1946
      xmlFreeDoc(doc);
1947
    if (ctxt != NULL)
1948
      xmlFreeParserCtxt(ctxt);
1949
1950
    pg_xml_done(xmlerrcxt, true);
1951
1952
    PG_RE_THROW();
1953
  }
1954
  PG_END_TRY();
1955
1956
  if (save_keep_blanks != -1)
1957
    xmlKeepBlanksDefault(save_keep_blanks);
1958
1959
  if (ctxt != NULL)
1960
    xmlFreeParserCtxt(ctxt);
1961
1962
  pg_xml_done(xmlerrcxt, false);
1963
1964
  return doc;
1965
}
1966
1967
1968
/*
1969
 * xmlChar<->text conversions
1970
 */
1971
static xmlChar *
1972
xml_text2xmlChar(text *in)
1973
{
1974
  return (xmlChar *) text_to_cstring(in);
1975
}
1976
1977
1978
#ifdef USE_LIBXMLCONTEXT
1979
1980
/*
1981
 * Manage the special context used for all libxml allocations (but only
1982
 * in special debug builds; see notes at top of file)
1983
 */
1984
static void
1985
xml_memory_init(void)
1986
{
1987
  /* Create memory context if not there already */
1988
  if (LibxmlContext == NULL)
1989
    LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1990
                        "Libxml context",
1991
                        ALLOCSET_DEFAULT_SIZES);
1992
1993
  /* Re-establish the callbacks even if already set */
1994
  xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1995
}
1996
1997
/*
1998
 * Wrappers for memory management functions
1999
 */
2000
static void *
2001
xml_palloc(size_t size)
2002
{
2003
  return MemoryContextAlloc(LibxmlContext, size);
2004
}
2005
2006
2007
static void *
2008
xml_repalloc(void *ptr, size_t size)
2009
{
2010
  return repalloc(ptr, size);
2011
}
2012
2013
2014
static void
2015
xml_pfree(void *ptr)
2016
{
2017
  /* At least some parts of libxml assume xmlFree(NULL) is allowed */
2018
  if (ptr)
2019
    pfree(ptr);
2020
}
2021
2022
2023
static char *
2024
xml_pstrdup(const char *string)
2025
{
2026
  return MemoryContextStrdup(LibxmlContext, string);
2027
}
2028
#endif              /* USE_LIBXMLCONTEXT */
2029
2030
2031
/*
2032
 * xmlPgEntityLoader --- entity loader callback function
2033
 *
2034
 * Silently prevent any external entity URL from being loaded.  We don't want
2035
 * to throw an error, so instead make the entity appear to expand to an empty
2036
 * string.
2037
 *
2038
 * We would prefer to allow loading entities that exist in the system's
2039
 * global XML catalog; but the available libxml2 APIs make that a complex
2040
 * and fragile task.  For now, just shut down all external access.
2041
 */
2042
static xmlParserInputPtr
2043
xmlPgEntityLoader(const char *URL, const char *ID,
2044
          xmlParserCtxtPtr ctxt)
2045
{
2046
  return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2047
}
2048
2049
2050
/*
2051
 * xml_ereport --- report an XML-related error
2052
 *
2053
 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2054
 * standard.  This function adds libxml's native error message, if any, as
2055
 * detail.
2056
 *
2057
 * This is exported for modules that want to share the core libxml error
2058
 * handler.  Note that pg_xml_init() *must* have been called previously.
2059
 */
2060
void
2061
xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2062
{
2063
  char     *detail;
2064
2065
  /* Defend against someone passing us a bogus context struct */
2066
  if (errcxt->magic != ERRCXT_MAGIC)
2067
    elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2068
2069
  /* Flag that the current libxml error has been reported */
2070
  errcxt->err_occurred = false;
2071
2072
  /* Include detail only if we have some text from libxml */
2073
  if (errcxt->err_buf.len > 0)
2074
    detail = errcxt->err_buf.data;
2075
  else
2076
    detail = NULL;
2077
2078
  ereport(level,
2079
      (errcode(sqlcode),
2080
       errmsg_internal("%s", msg),
2081
       detail ? errdetail_internal("%s", detail) : 0));
2082
}
2083
2084
2085
/*
2086
 * xml_errsave --- save an XML-related error
2087
 *
2088
 * If escontext is an ErrorSaveContext, error details are saved into it,
2089
 * and control returns normally.
2090
 *
2091
 * Otherwise, the error is thrown, so that this is equivalent to
2092
 * xml_ereport() with level == ERROR.
2093
 *
2094
 * This should be used only for errors that we're sure we do not need
2095
 * a transaction abort to clean up after.
2096
 */
2097
static void
2098
xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2099
      int sqlcode, const char *msg)
2100
{
2101
  char     *detail;
2102
2103
  /* Defend against someone passing us a bogus context struct */
2104
  if (errcxt->magic != ERRCXT_MAGIC)
2105
    elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2106
2107
  /* Flag that the current libxml error has been reported */
2108
  errcxt->err_occurred = false;
2109
2110
  /* Include detail only if we have some text from libxml */
2111
  if (errcxt->err_buf.len > 0)
2112
    detail = errcxt->err_buf.data;
2113
  else
2114
    detail = NULL;
2115
2116
  errsave(escontext,
2117
      (errcode(sqlcode),
2118
       errmsg_internal("%s", msg),
2119
       detail ? errdetail_internal("%s", detail) : 0));
2120
}
2121
2122
2123
/*
2124
 * Error handler for libxml errors and warnings
2125
 */
2126
static void
2127
xml_errorHandler(void *data, PgXmlErrorPtr error)
2128
{
2129
  PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2130
  xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2131
  xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2132
  xmlNodePtr  node = error->node;
2133
  const xmlChar *name = (node != NULL &&
2134
               node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2135
  int     domain = error->domain;
2136
  int     level = error->level;
2137
  StringInfo  errorBuf;
2138
2139
  /*
2140
   * Defend against someone passing us a bogus context struct.
2141
   *
2142
   * We force a backend exit if this check fails because longjmp'ing out of
2143
   * libxml would likely render it unsafe to use further.
2144
   */
2145
  if (xmlerrcxt->magic != ERRCXT_MAGIC)
2146
    elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2147
2148
  /*----------
2149
   * Older libxml versions report some errors differently.
2150
   * First, some errors were previously reported as coming from the parser
2151
   * domain but are now reported as coming from the namespace domain.
2152
   * Second, some warnings were upgraded to errors.
2153
   * We attempt to compensate for that here.
2154
   *----------
2155
   */
2156
  switch (error->code)
2157
  {
2158
    case XML_WAR_NS_URI:
2159
      level = XML_ERR_ERROR;
2160
      domain = XML_FROM_NAMESPACE;
2161
      break;
2162
2163
    case XML_ERR_NS_DECL_ERROR:
2164
    case XML_WAR_NS_URI_RELATIVE:
2165
    case XML_WAR_NS_COLUMN:
2166
    case XML_NS_ERR_XML_NAMESPACE:
2167
    case XML_NS_ERR_UNDEFINED_NAMESPACE:
2168
    case XML_NS_ERR_QNAME:
2169
    case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2170
    case XML_NS_ERR_EMPTY:
2171
      domain = XML_FROM_NAMESPACE;
2172
      break;
2173
  }
2174
2175
  /* Decide whether to act on the error or not */
2176
  switch (domain)
2177
  {
2178
    case XML_FROM_PARSER:
2179
2180
      /*
2181
       * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2182
       * other, more on-point error.  Furthermore, libxml2 2.13 reports
2183
       * it under a completely different set of rules than prior
2184
       * versions.  To avoid cross-version behavioral differences,
2185
       * suppress it so long as we already logged some error.
2186
       */
2187
      if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2188
        xmlerrcxt->err_occurred)
2189
        return;
2190
      /* fall through */
2191
2192
    case XML_FROM_NONE:
2193
    case XML_FROM_MEMORY:
2194
    case XML_FROM_IO:
2195
2196
      /*
2197
       * Suppress warnings about undeclared entities.  We need to do
2198
       * this to avoid problems due to not loading DTD definitions.
2199
       */
2200
      if (error->code == XML_WAR_UNDECLARED_ENTITY)
2201
        return;
2202
2203
      /* Otherwise, accept error regardless of the parsing purpose */
2204
      break;
2205
2206
    default:
2207
      /* Ignore error if only doing well-formedness check */
2208
      if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2209
        return;
2210
      break;
2211
  }
2212
2213
  /* Prepare error message in errorBuf */
2214
  errorBuf = makeStringInfo();
2215
2216
  if (error->line > 0)
2217
    appendStringInfo(errorBuf, "line %d: ", error->line);
2218
  if (name != NULL)
2219
    appendStringInfo(errorBuf, "element %s: ", name);
2220
  if (error->message != NULL)
2221
    appendStringInfoString(errorBuf, error->message);
2222
  else
2223
    appendStringInfoString(errorBuf, "(no message provided)");
2224
2225
  /*
2226
   * Append context information to errorBuf.
2227
   *
2228
   * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2229
   * write the context.  Since we don't want to duplicate libxml
2230
   * functionality here, we set up a generic error handler temporarily.
2231
   *
2232
   * We use appendStringInfo() directly as libxml's generic error handler.
2233
   * This should work because it has essentially the same signature as
2234
   * libxml expects, namely (void *ptr, const char *msg, ...).
2235
   */
2236
  if (input != NULL)
2237
  {
2238
    xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2239
    void     *errCtxSaved = xmlGenericErrorContext;
2240
2241
    xmlSetGenericErrorFunc(errorBuf,
2242
                 (xmlGenericErrorFunc) appendStringInfo);
2243
2244
    /* Add context information to errorBuf */
2245
    appendStringInfoLineSeparator(errorBuf);
2246
2247
    xmlParserPrintFileContext(input);
2248
2249
    /* Restore generic error func */
2250
    xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2251
  }
2252
2253
  /* Get rid of any trailing newlines in errorBuf */
2254
  chopStringInfoNewlines(errorBuf);
2255
2256
  /*
2257
   * Legacy error handling mode.  err_occurred is never set, we just add the
2258
   * message to err_buf.  This mode exists because the xml2 contrib module
2259
   * uses our error-handling infrastructure, but we don't want to change its
2260
   * behaviour since it's deprecated anyway.  This is also why we don't
2261
   * distinguish between notices, warnings and errors here --- the old-style
2262
   * generic error handler wouldn't have done that either.
2263
   */
2264
  if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2265
  {
2266
    appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2267
    appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2268
                 errorBuf->len);
2269
2270
    destroyStringInfo(errorBuf);
2271
    return;
2272
  }
2273
2274
  /*
2275
   * We don't want to ereport() here because that'd probably leave libxml in
2276
   * an inconsistent state.  Instead, we remember the error and ereport()
2277
   * from xml_ereport().
2278
   *
2279
   * Warnings and notices can be reported immediately since they won't cause
2280
   * a longjmp() out of libxml.
2281
   */
2282
  if (level >= XML_ERR_ERROR)
2283
  {
2284
    appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2285
    appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2286
                 errorBuf->len);
2287
2288
    xmlerrcxt->err_occurred = true;
2289
  }
2290
  else if (level >= XML_ERR_WARNING)
2291
  {
2292
    ereport(WARNING,
2293
        (errmsg_internal("%s", errorBuf->data)));
2294
  }
2295
  else
2296
  {
2297
    ereport(NOTICE,
2298
        (errmsg_internal("%s", errorBuf->data)));
2299
  }
2300
2301
  destroyStringInfo(errorBuf);
2302
}
2303
2304
2305
/*
2306
 * Convert libxml error codes into textual errdetail messages.
2307
 *
2308
 * This should be called within an ereport or errsave invocation,
2309
 * just as errdetail would be.
2310
 *
2311
 * At the moment, we only need to cover those codes that we
2312
 * may raise in this file.
2313
 */
2314
static int
2315
errdetail_for_xml_code(int code)
2316
{
2317
  const char *det;
2318
2319
  switch (code)
2320
  {
2321
    case XML_ERR_INVALID_CHAR:
2322
      det = gettext_noop("Invalid character value.");
2323
      break;
2324
    case XML_ERR_SPACE_REQUIRED:
2325
      det = gettext_noop("Space required.");
2326
      break;
2327
    case XML_ERR_STANDALONE_VALUE:
2328
      det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2329
      break;
2330
    case XML_ERR_VERSION_MISSING:
2331
      det = gettext_noop("Malformed declaration: missing version.");
2332
      break;
2333
    case XML_ERR_MISSING_ENCODING:
2334
      det = gettext_noop("Missing encoding in text declaration.");
2335
      break;
2336
    case XML_ERR_XMLDECL_NOT_FINISHED:
2337
      det = gettext_noop("Parsing XML declaration: '?>' expected.");
2338
      break;
2339
    default:
2340
      det = gettext_noop("Unrecognized libxml error code: %d.");
2341
      break;
2342
  }
2343
2344
  return errdetail(det, code);
2345
}
2346
2347
2348
/*
2349
 * Remove all trailing newlines from a StringInfo string
2350
 */
2351
static void
2352
chopStringInfoNewlines(StringInfo str)
2353
{
2354
  while (str->len > 0 && str->data[str->len - 1] == '\n')
2355
    str->data[--str->len] = '\0';
2356
}
2357
2358
2359
/*
2360
 * Append a newline after removing any existing trailing newlines
2361
 */
2362
static void
2363
appendStringInfoLineSeparator(StringInfo str)
2364
{
2365
  chopStringInfoNewlines(str);
2366
  if (str->len > 0)
2367
    appendStringInfoChar(str, '\n');
2368
}
2369
2370
2371
/*
2372
 * Convert one char in the current server encoding to a Unicode codepoint.
2373
 */
2374
static pg_wchar
2375
sqlchar_to_unicode(const char *s)
2376
{
2377
  char     *utf8string;
2378
  pg_wchar  ret[2];     /* need space for trailing zero */
2379
2380
  /* note we're not assuming s is null-terminated */
2381
  utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2382
2383
  pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2384
                  pg_encoding_mblen(PG_UTF8, utf8string));
2385
2386
  if (utf8string != s)
2387
    pfree(utf8string);
2388
2389
  return ret[0];
2390
}
2391
2392
2393
static bool
2394
is_valid_xml_namefirst(pg_wchar c)
2395
{
2396
  /* (Letter | '_' | ':') */
2397
  return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2398
      || c == '_' || c == ':');
2399
}
2400
2401
2402
static bool
2403
is_valid_xml_namechar(pg_wchar c)
2404
{
2405
  /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2406
  return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2407
      || xmlIsDigitQ(c)
2408
      || c == '.' || c == '-' || c == '_' || c == ':'
2409
      || xmlIsCombiningQ(c)
2410
      || xmlIsExtenderQ(c));
2411
}
2412
#endif              /* USE_LIBXML */
2413
2414
2415
/*
2416
 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2417
 */
2418
char *
2419
map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2420
                 bool escape_period)
2421
0
{
2422
#ifdef USE_LIBXML
2423
  StringInfoData buf;
2424
  const char *p;
2425
2426
  /*
2427
   * SQL/XML doesn't make use of this case anywhere, so it's probably a
2428
   * mistake.
2429
   */
2430
  Assert(fully_escaped || !escape_period);
2431
2432
  initStringInfo(&buf);
2433
2434
  for (p = ident; *p; p += pg_mblen(p))
2435
  {
2436
    if (*p == ':' && (p == ident || fully_escaped))
2437
      appendStringInfoString(&buf, "_x003A_");
2438
    else if (*p == '_' && *(p + 1) == 'x')
2439
      appendStringInfoString(&buf, "_x005F_");
2440
    else if (fully_escaped && p == ident &&
2441
         pg_strncasecmp(p, "xml", 3) == 0)
2442
    {
2443
      if (*p == 'x')
2444
        appendStringInfoString(&buf, "_x0078_");
2445
      else
2446
        appendStringInfoString(&buf, "_x0058_");
2447
    }
2448
    else if (escape_period && *p == '.')
2449
      appendStringInfoString(&buf, "_x002E_");
2450
    else
2451
    {
2452
      pg_wchar  u = sqlchar_to_unicode(p);
2453
2454
      if ((p == ident)
2455
        ? !is_valid_xml_namefirst(u)
2456
        : !is_valid_xml_namechar(u))
2457
        appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2458
      else
2459
        appendBinaryStringInfo(&buf, p, pg_mblen(p));
2460
    }
2461
  }
2462
2463
  return buf.data;
2464
#else             /* not USE_LIBXML */
2465
0
  NO_XML_SUPPORT();
2466
0
  return NULL;
2467
0
#endif              /* not USE_LIBXML */
2468
0
}
2469
2470
2471
/*
2472
 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2473
 */
2474
char *
2475
map_xml_name_to_sql_identifier(const char *name)
2476
0
{
2477
0
  StringInfoData buf;
2478
0
  const char *p;
2479
2480
0
  initStringInfo(&buf);
2481
2482
0
  for (p = name; *p; p += pg_mblen(p))
2483
0
  {
2484
0
    if (*p == '_' && *(p + 1) == 'x'
2485
0
      && isxdigit((unsigned char) *(p + 2))
2486
0
      && isxdigit((unsigned char) *(p + 3))
2487
0
      && isxdigit((unsigned char) *(p + 4))
2488
0
      && isxdigit((unsigned char) *(p + 5))
2489
0
      && *(p + 6) == '_')
2490
0
    {
2491
0
      char    cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2492
0
      unsigned int u;
2493
2494
0
      sscanf(p + 2, "%X", &u);
2495
0
      pg_unicode_to_server(u, (unsigned char *) cbuf);
2496
0
      appendStringInfoString(&buf, cbuf);
2497
0
      p += 6;
2498
0
    }
2499
0
    else
2500
0
      appendBinaryStringInfo(&buf, p, pg_mblen(p));
2501
0
  }
2502
2503
0
  return buf.data;
2504
0
}
2505
2506
/*
2507
 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2508
 *
2509
 * When xml_escape_strings is true, then certain characters in string
2510
 * values are replaced by entity references (&lt; etc.), as specified
2511
 * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2512
 * wanted.  The false case is mainly useful when the resulting value
2513
 * is used with xmlTextWriterWriteAttribute() to write out an
2514
 * attribute, because that function does the escaping itself.
2515
 */
2516
char *
2517
map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2518
0
{
2519
0
  if (type_is_array_domain(type))
2520
0
  {
2521
0
    ArrayType  *array;
2522
0
    Oid     elmtype;
2523
0
    int16   elmlen;
2524
0
    bool    elmbyval;
2525
0
    char    elmalign;
2526
0
    int     num_elems;
2527
0
    Datum    *elem_values;
2528
0
    bool     *elem_nulls;
2529
0
    StringInfoData buf;
2530
0
    int     i;
2531
2532
0
    array = DatumGetArrayTypeP(value);
2533
0
    elmtype = ARR_ELEMTYPE(array);
2534
0
    get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2535
2536
0
    deconstruct_array(array, elmtype,
2537
0
              elmlen, elmbyval, elmalign,
2538
0
              &elem_values, &elem_nulls,
2539
0
              &num_elems);
2540
2541
0
    initStringInfo(&buf);
2542
2543
0
    for (i = 0; i < num_elems; i++)
2544
0
    {
2545
0
      if (elem_nulls[i])
2546
0
        continue;
2547
0
      appendStringInfoString(&buf, "<element>");
2548
0
      appendStringInfoString(&buf,
2549
0
                   map_sql_value_to_xml_value(elem_values[i],
2550
0
                                elmtype, true));
2551
0
      appendStringInfoString(&buf, "</element>");
2552
0
    }
2553
2554
0
    pfree(elem_values);
2555
0
    pfree(elem_nulls);
2556
2557
0
    return buf.data;
2558
0
  }
2559
0
  else
2560
0
  {
2561
0
    Oid     typeOut;
2562
0
    bool    isvarlena;
2563
0
    char     *str;
2564
2565
    /*
2566
     * Flatten domains; the special-case treatments below should apply to,
2567
     * eg, domains over boolean not just boolean.
2568
     */
2569
0
    type = getBaseType(type);
2570
2571
    /*
2572
     * Special XSD formatting for some data types
2573
     */
2574
0
    switch (type)
2575
0
    {
2576
0
      case BOOLOID:
2577
0
        if (DatumGetBool(value))
2578
0
          return "true";
2579
0
        else
2580
0
          return "false";
2581
2582
0
      case DATEOID:
2583
0
        {
2584
0
          DateADT   date;
2585
0
          struct pg_tm tm;
2586
0
          char    buf[MAXDATELEN + 1];
2587
2588
0
          date = DatumGetDateADT(value);
2589
          /* XSD doesn't support infinite values */
2590
0
          if (DATE_NOT_FINITE(date))
2591
0
            ereport(ERROR,
2592
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2593
0
                 errmsg("date out of range"),
2594
0
                 errdetail("XML does not support infinite date values.")));
2595
0
          j2date(date + POSTGRES_EPOCH_JDATE,
2596
0
               &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2597
0
          EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2598
2599
0
          return pstrdup(buf);
2600
0
        }
2601
2602
0
      case TIMESTAMPOID:
2603
0
        {
2604
0
          Timestamp timestamp;
2605
0
          struct pg_tm tm;
2606
0
          fsec_t    fsec;
2607
0
          char    buf[MAXDATELEN + 1];
2608
2609
0
          timestamp = DatumGetTimestamp(value);
2610
2611
          /* XSD doesn't support infinite values */
2612
0
          if (TIMESTAMP_NOT_FINITE(timestamp))
2613
0
            ereport(ERROR,
2614
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2615
0
                 errmsg("timestamp out of range"),
2616
0
                 errdetail("XML does not support infinite timestamp values.")));
2617
0
          else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2618
0
            EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2619
0
          else
2620
0
            ereport(ERROR,
2621
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2622
0
                 errmsg("timestamp out of range")));
2623
2624
0
          return pstrdup(buf);
2625
0
        }
2626
2627
0
      case TIMESTAMPTZOID:
2628
0
        {
2629
0
          TimestampTz timestamp;
2630
0
          struct pg_tm tm;
2631
0
          int     tz;
2632
0
          fsec_t    fsec;
2633
0
          const char *tzn = NULL;
2634
0
          char    buf[MAXDATELEN + 1];
2635
2636
0
          timestamp = DatumGetTimestamp(value);
2637
2638
          /* XSD doesn't support infinite values */
2639
0
          if (TIMESTAMP_NOT_FINITE(timestamp))
2640
0
            ereport(ERROR,
2641
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2642
0
                 errmsg("timestamp out of range"),
2643
0
                 errdetail("XML does not support infinite timestamp values.")));
2644
0
          else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2645
0
            EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2646
0
          else
2647
0
            ereport(ERROR,
2648
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2649
0
                 errmsg("timestamp out of range")));
2650
2651
0
          return pstrdup(buf);
2652
0
        }
2653
2654
#ifdef USE_LIBXML
2655
      case BYTEAOID:
2656
        {
2657
          bytea    *bstr = DatumGetByteaPP(value);
2658
          PgXmlErrorContext *xmlerrcxt;
2659
          volatile xmlBufferPtr buf = NULL;
2660
          volatile xmlTextWriterPtr writer = NULL;
2661
          char     *result;
2662
2663
          xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2664
2665
          PG_TRY();
2666
          {
2667
            buf = xmlBufferCreate();
2668
            if (buf == NULL || xmlerrcxt->err_occurred)
2669
              xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2670
                    "could not allocate xmlBuffer");
2671
            writer = xmlNewTextWriterMemory(buf, 0);
2672
            if (writer == NULL || xmlerrcxt->err_occurred)
2673
              xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2674
                    "could not allocate xmlTextWriter");
2675
2676
            if (xmlbinary == XMLBINARY_BASE64)
2677
              xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2678
                           0, VARSIZE_ANY_EXHDR(bstr));
2679
            else
2680
              xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2681
                           0, VARSIZE_ANY_EXHDR(bstr));
2682
2683
            /* we MUST do this now to flush data out to the buffer */
2684
            xmlFreeTextWriter(writer);
2685
            writer = NULL;
2686
2687
            result = pstrdup((const char *) xmlBufferContent(buf));
2688
          }
2689
          PG_CATCH();
2690
          {
2691
            if (writer)
2692
              xmlFreeTextWriter(writer);
2693
            if (buf)
2694
              xmlBufferFree(buf);
2695
2696
            pg_xml_done(xmlerrcxt, true);
2697
2698
            PG_RE_THROW();
2699
          }
2700
          PG_END_TRY();
2701
2702
          xmlBufferFree(buf);
2703
2704
          pg_xml_done(xmlerrcxt, false);
2705
2706
          return result;
2707
        }
2708
#endif              /* USE_LIBXML */
2709
2710
0
    }
2711
2712
    /*
2713
     * otherwise, just use the type's native text representation
2714
     */
2715
0
    getTypeOutputInfo(type, &typeOut, &isvarlena);
2716
0
    str = OidOutputFunctionCall(typeOut, value);
2717
2718
    /* ... exactly as-is for XML, and when escaping is not wanted */
2719
0
    if (type == XMLOID || !xml_escape_strings)
2720
0
      return str;
2721
2722
    /* otherwise, translate special characters as needed */
2723
0
    return escape_xml(str);
2724
0
  }
2725
0
}
2726
2727
2728
/*
2729
 * Escape characters in text that have special meanings in XML.
2730
 *
2731
 * Returns a palloc'd string.
2732
 *
2733
 * NB: this is intentionally not dependent on libxml.
2734
 */
2735
char *
2736
escape_xml(const char *str)
2737
0
{
2738
0
  StringInfoData buf;
2739
0
  const char *p;
2740
2741
0
  initStringInfo(&buf);
2742
0
  for (p = str; *p; p++)
2743
0
  {
2744
0
    switch (*p)
2745
0
    {
2746
0
      case '&':
2747
0
        appendStringInfoString(&buf, "&amp;");
2748
0
        break;
2749
0
      case '<':
2750
0
        appendStringInfoString(&buf, "&lt;");
2751
0
        break;
2752
0
      case '>':
2753
0
        appendStringInfoString(&buf, "&gt;");
2754
0
        break;
2755
0
      case '\r':
2756
0
        appendStringInfoString(&buf, "&#x0d;");
2757
0
        break;
2758
0
      default:
2759
0
        appendStringInfoCharMacro(&buf, *p);
2760
0
        break;
2761
0
    }
2762
0
  }
2763
0
  return buf.data;
2764
0
}
2765
2766
2767
static char *
2768
_SPI_strdup(const char *s)
2769
0
{
2770
0
  size_t    len = strlen(s) + 1;
2771
0
  char     *ret = SPI_palloc(len);
2772
2773
0
  memcpy(ret, s, len);
2774
0
  return ret;
2775
0
}
2776
2777
2778
/*
2779
 * SQL to XML mapping functions
2780
 *
2781
 * What follows below was at one point intentionally organized so that
2782
 * you can read along in the SQL/XML standard. The functions are
2783
 * mostly split up the way the clauses lay out in the standards
2784
 * document, and the identifiers are also aligned with the standard
2785
 * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2786
 * differently than SQL/XML:2003, so the order below doesn't make much
2787
 * sense anymore.
2788
 *
2789
 * There are many things going on there:
2790
 *
2791
 * There are two kinds of mappings: Mapping SQL data (table contents)
2792
 * to XML documents, and mapping SQL structure (the "schema") to XML
2793
 * Schema.  And there are functions that do both at the same time.
2794
 *
2795
 * Then you can map a database, a schema, or a table, each in both
2796
 * ways.  This breaks down recursively: Mapping a database invokes
2797
 * mapping schemas, which invokes mapping tables, which invokes
2798
 * mapping rows, which invokes mapping columns, although you can't
2799
 * call the last two from the outside.  Because of this, there are a
2800
 * number of xyz_internal() functions which are to be called both from
2801
 * the function manager wrapper and from some upper layer in a
2802
 * recursive call.
2803
 *
2804
 * See the documentation about what the common function arguments
2805
 * nulls, tableforest, and targetns mean.
2806
 *
2807
 * Some style guidelines for XML output: Use double quotes for quoting
2808
 * XML attributes.  Indent XML elements by two spaces, but remember
2809
 * that a lot of code is called recursively at different levels, so
2810
 * it's better not to indent rather than create output that indents
2811
 * and outdents weirdly.  Add newlines to make the output look nice.
2812
 */
2813
2814
2815
/*
2816
 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2817
 * 4.10.8.
2818
 */
2819
2820
/*
2821
 * Given a query, which must return type oid as first column, produce
2822
 * a list of Oids with the query results.
2823
 */
2824
static List *
2825
query_to_oid_list(const char *query)
2826
0
{
2827
0
  uint64    i;
2828
0
  List     *list = NIL;
2829
0
  int     spi_result;
2830
2831
0
  spi_result = SPI_execute(query, true, 0);
2832
0
  if (spi_result != SPI_OK_SELECT)
2833
0
    elog(ERROR, "SPI_execute returned %s for %s",
2834
0
       SPI_result_code_string(spi_result), query);
2835
2836
0
  for (i = 0; i < SPI_processed; i++)
2837
0
  {
2838
0
    Datum   oid;
2839
0
    bool    isnull;
2840
2841
0
    oid = SPI_getbinval(SPI_tuptable->vals[i],
2842
0
              SPI_tuptable->tupdesc,
2843
0
              1,
2844
0
              &isnull);
2845
0
    if (!isnull)
2846
0
      list = lappend_oid(list, DatumGetObjectId(oid));
2847
0
  }
2848
2849
0
  return list;
2850
0
}
2851
2852
2853
static List *
2854
schema_get_xml_visible_tables(Oid nspid)
2855
0
{
2856
0
  StringInfoData query;
2857
2858
0
  initStringInfo(&query);
2859
0
  appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2860
0
           " WHERE relnamespace = %u AND relkind IN ("
2861
0
           CppAsString2(RELKIND_RELATION) ","
2862
0
           CppAsString2(RELKIND_MATVIEW) ","
2863
0
           CppAsString2(RELKIND_VIEW) ")"
2864
0
           " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2865
0
           " ORDER BY relname;", nspid);
2866
2867
0
  return query_to_oid_list(query.data);
2868
0
}
2869
2870
2871
/*
2872
 * Including the system schemas is probably not useful for a database
2873
 * mapping.
2874
 */
2875
#define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2876
2877
0
#define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2878
2879
2880
static List *
2881
database_get_xml_visible_schemas(void)
2882
0
{
2883
0
  return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2884
0
}
2885
2886
2887
static List *
2888
database_get_xml_visible_tables(void)
2889
0
{
2890
  /* At the moment there is no order required here. */
2891
0
  return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2892
0
               " WHERE relkind IN ("
2893
0
               CppAsString2(RELKIND_RELATION) ","
2894
0
               CppAsString2(RELKIND_MATVIEW) ","
2895
0
               CppAsString2(RELKIND_VIEW) ")"
2896
0
               " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2897
0
               " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2898
0
}
2899
2900
2901
/*
2902
 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2903
 * section 9.11.
2904
 */
2905
2906
static StringInfo
2907
table_to_xml_internal(Oid relid,
2908
            const char *xmlschema, bool nulls, bool tableforest,
2909
            const char *targetns, bool top_level)
2910
0
{
2911
0
  StringInfoData query;
2912
2913
0
  initStringInfo(&query);
2914
0
  appendStringInfo(&query, "SELECT * FROM %s",
2915
0
           DatumGetCString(DirectFunctionCall1(regclassout,
2916
0
                             ObjectIdGetDatum(relid))));
2917
0
  return query_to_xml_internal(query.data, get_rel_name(relid),
2918
0
                 xmlschema, nulls, tableforest,
2919
0
                 targetns, top_level);
2920
0
}
2921
2922
2923
Datum
2924
table_to_xml(PG_FUNCTION_ARGS)
2925
0
{
2926
0
  Oid     relid = PG_GETARG_OID(0);
2927
0
  bool    nulls = PG_GETARG_BOOL(1);
2928
0
  bool    tableforest = PG_GETARG_BOOL(2);
2929
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2930
2931
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2932
0
                                nulls, tableforest,
2933
0
                                targetns, true)));
2934
0
}
2935
2936
2937
Datum
2938
query_to_xml(PG_FUNCTION_ARGS)
2939
0
{
2940
0
  char     *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2941
0
  bool    nulls = PG_GETARG_BOOL(1);
2942
0
  bool    tableforest = PG_GETARG_BOOL(2);
2943
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2944
2945
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2946
0
                                NULL, nulls, tableforest,
2947
0
                                targetns, true)));
2948
0
}
2949
2950
2951
Datum
2952
cursor_to_xml(PG_FUNCTION_ARGS)
2953
0
{
2954
0
  char     *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2955
0
  int32   count = PG_GETARG_INT32(1);
2956
0
  bool    nulls = PG_GETARG_BOOL(2);
2957
0
  bool    tableforest = PG_GETARG_BOOL(3);
2958
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2959
2960
0
  StringInfoData result;
2961
0
  Portal    portal;
2962
0
  uint64    i;
2963
2964
0
  initStringInfo(&result);
2965
2966
0
  if (!tableforest)
2967
0
  {
2968
0
    xmldata_root_element_start(&result, "table", NULL, targetns, true);
2969
0
    appendStringInfoChar(&result, '\n');
2970
0
  }
2971
2972
0
  SPI_connect();
2973
0
  portal = SPI_cursor_find(name);
2974
0
  if (portal == NULL)
2975
0
    ereport(ERROR,
2976
0
        (errcode(ERRCODE_UNDEFINED_CURSOR),
2977
0
         errmsg("cursor \"%s\" does not exist", name)));
2978
2979
0
  SPI_cursor_fetch(portal, true, count);
2980
0
  for (i = 0; i < SPI_processed; i++)
2981
0
    SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2982
0
                  tableforest, targetns, true);
2983
2984
0
  SPI_finish();
2985
2986
0
  if (!tableforest)
2987
0
    xmldata_root_element_end(&result, "table");
2988
2989
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2990
0
}
2991
2992
2993
/*
2994
 * Write the start tag of the root element of a data mapping.
2995
 *
2996
 * top_level means that this is the very top level of the eventual
2997
 * output.  For example, when the user calls table_to_xml, then a call
2998
 * with a table name to this function is the top level.  When the user
2999
 * calls database_to_xml, then a call with a schema name to this
3000
 * function is not the top level.  If top_level is false, then the XML
3001
 * namespace declarations are omitted, because they supposedly already
3002
 * appeared earlier in the output.  Repeating them is not wrong, but
3003
 * it looks ugly.
3004
 */
3005
static void
3006
xmldata_root_element_start(StringInfo result, const char *eltname,
3007
               const char *xmlschema, const char *targetns,
3008
               bool top_level)
3009
0
{
3010
  /* This isn't really wrong but currently makes no sense. */
3011
0
  Assert(top_level || !xmlschema);
3012
3013
0
  appendStringInfo(result, "<%s", eltname);
3014
0
  if (top_level)
3015
0
  {
3016
0
    appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
3017
0
    if (strlen(targetns) > 0)
3018
0
      appendStringInfo(result, " xmlns=\"%s\"", targetns);
3019
0
  }
3020
0
  if (xmlschema)
3021
0
  {
3022
    /* FIXME: better targets */
3023
0
    if (strlen(targetns) > 0)
3024
0
      appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
3025
0
    else
3026
0
      appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
3027
0
  }
3028
0
  appendStringInfoString(result, ">\n");
3029
0
}
3030
3031
3032
static void
3033
xmldata_root_element_end(StringInfo result, const char *eltname)
3034
0
{
3035
0
  appendStringInfo(result, "</%s>\n", eltname);
3036
0
}
3037
3038
3039
static StringInfo
3040
query_to_xml_internal(const char *query, char *tablename,
3041
            const char *xmlschema, bool nulls, bool tableforest,
3042
            const char *targetns, bool top_level)
3043
0
{
3044
0
  StringInfo  result;
3045
0
  char     *xmltn;
3046
0
  uint64    i;
3047
3048
0
  if (tablename)
3049
0
    xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3050
0
  else
3051
0
    xmltn = "table";
3052
3053
0
  result = makeStringInfo();
3054
3055
0
  SPI_connect();
3056
0
  if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3057
0
    ereport(ERROR,
3058
0
        (errcode(ERRCODE_DATA_EXCEPTION),
3059
0
         errmsg("invalid query")));
3060
3061
0
  if (!tableforest)
3062
0
  {
3063
0
    xmldata_root_element_start(result, xmltn, xmlschema,
3064
0
                   targetns, top_level);
3065
0
    appendStringInfoChar(result, '\n');
3066
0
  }
3067
3068
0
  if (xmlschema)
3069
0
    appendStringInfo(result, "%s\n\n", xmlschema);
3070
3071
0
  for (i = 0; i < SPI_processed; i++)
3072
0
    SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3073
0
                  tableforest, targetns, top_level);
3074
3075
0
  if (!tableforest)
3076
0
    xmldata_root_element_end(result, xmltn);
3077
3078
0
  SPI_finish();
3079
3080
0
  return result;
3081
0
}
3082
3083
3084
Datum
3085
table_to_xmlschema(PG_FUNCTION_ARGS)
3086
0
{
3087
0
  Oid     relid = PG_GETARG_OID(0);
3088
0
  bool    nulls = PG_GETARG_BOOL(1);
3089
0
  bool    tableforest = PG_GETARG_BOOL(2);
3090
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3091
0
  const char *result;
3092
0
  Relation  rel;
3093
3094
0
  rel = table_open(relid, AccessShareLock);
3095
0
  result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3096
0
                    tableforest, targetns);
3097
0
  table_close(rel, NoLock);
3098
3099
0
  PG_RETURN_XML_P(cstring_to_xmltype(result));
3100
0
}
3101
3102
3103
Datum
3104
query_to_xmlschema(PG_FUNCTION_ARGS)
3105
0
{
3106
0
  char     *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3107
0
  bool    nulls = PG_GETARG_BOOL(1);
3108
0
  bool    tableforest = PG_GETARG_BOOL(2);
3109
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3110
0
  const char *result;
3111
0
  SPIPlanPtr  plan;
3112
0
  Portal    portal;
3113
3114
0
  SPI_connect();
3115
3116
0
  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3117
0
    elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3118
3119
0
  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3120
0
    elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3121
3122
0
  result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3123
0
                          InvalidOid, nulls,
3124
0
                          tableforest, targetns));
3125
0
  SPI_cursor_close(portal);
3126
0
  SPI_finish();
3127
3128
0
  PG_RETURN_XML_P(cstring_to_xmltype(result));
3129
0
}
3130
3131
3132
Datum
3133
cursor_to_xmlschema(PG_FUNCTION_ARGS)
3134
0
{
3135
0
  char     *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3136
0
  bool    nulls = PG_GETARG_BOOL(1);
3137
0
  bool    tableforest = PG_GETARG_BOOL(2);
3138
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3139
0
  const char *xmlschema;
3140
0
  Portal    portal;
3141
3142
0
  SPI_connect();
3143
0
  portal = SPI_cursor_find(name);
3144
0
  if (portal == NULL)
3145
0
    ereport(ERROR,
3146
0
        (errcode(ERRCODE_UNDEFINED_CURSOR),
3147
0
         errmsg("cursor \"%s\" does not exist", name)));
3148
0
  if (portal->tupDesc == NULL)
3149
0
    ereport(ERROR,
3150
0
        (errcode(ERRCODE_INVALID_CURSOR_STATE),
3151
0
         errmsg("portal \"%s\" does not return tuples", name)));
3152
3153
0
  xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3154
0
                             InvalidOid, nulls,
3155
0
                             tableforest, targetns));
3156
0
  SPI_finish();
3157
3158
0
  PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3159
0
}
3160
3161
3162
Datum
3163
table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3164
0
{
3165
0
  Oid     relid = PG_GETARG_OID(0);
3166
0
  bool    nulls = PG_GETARG_BOOL(1);
3167
0
  bool    tableforest = PG_GETARG_BOOL(2);
3168
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3169
0
  Relation  rel;
3170
0
  const char *xmlschema;
3171
3172
0
  rel = table_open(relid, AccessShareLock);
3173
0
  xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3174
0
                       tableforest, targetns);
3175
0
  table_close(rel, NoLock);
3176
3177
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3178
0
                                xmlschema, nulls, tableforest,
3179
0
                                targetns, true)));
3180
0
}
3181
3182
3183
Datum
3184
query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3185
0
{
3186
0
  char     *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3187
0
  bool    nulls = PG_GETARG_BOOL(1);
3188
0
  bool    tableforest = PG_GETARG_BOOL(2);
3189
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3190
3191
0
  const char *xmlschema;
3192
0
  SPIPlanPtr  plan;
3193
0
  Portal    portal;
3194
3195
0
  SPI_connect();
3196
3197
0
  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3198
0
    elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3199
3200
0
  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3201
0
    elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3202
3203
0
  xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3204
0
                             InvalidOid, nulls, tableforest, targetns));
3205
0
  SPI_cursor_close(portal);
3206
0
  SPI_finish();
3207
3208
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3209
0
                                xmlschema, nulls, tableforest,
3210
0
                                targetns, true)));
3211
0
}
3212
3213
3214
/*
3215
 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3216
 * sections 9.13, 9.14.
3217
 */
3218
3219
static StringInfo
3220
schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3221
             bool tableforest, const char *targetns, bool top_level)
3222
0
{
3223
0
  StringInfo  result;
3224
0
  char     *xmlsn;
3225
0
  List     *relid_list;
3226
0
  ListCell   *cell;
3227
3228
0
  xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3229
0
                       true, false);
3230
0
  result = makeStringInfo();
3231
3232
0
  xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3233
0
  appendStringInfoChar(result, '\n');
3234
3235
0
  if (xmlschema)
3236
0
    appendStringInfo(result, "%s\n\n", xmlschema);
3237
3238
0
  SPI_connect();
3239
3240
0
  relid_list = schema_get_xml_visible_tables(nspid);
3241
3242
0
  foreach(cell, relid_list)
3243
0
  {
3244
0
    Oid     relid = lfirst_oid(cell);
3245
0
    StringInfo  subres;
3246
3247
0
    subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3248
0
                     targetns, false);
3249
3250
0
    appendBinaryStringInfo(result, subres->data, subres->len);
3251
0
    appendStringInfoChar(result, '\n');
3252
0
  }
3253
3254
0
  SPI_finish();
3255
3256
0
  xmldata_root_element_end(result, xmlsn);
3257
3258
0
  return result;
3259
0
}
3260
3261
3262
Datum
3263
schema_to_xml(PG_FUNCTION_ARGS)
3264
0
{
3265
0
  Name    name = PG_GETARG_NAME(0);
3266
0
  bool    nulls = PG_GETARG_BOOL(1);
3267
0
  bool    tableforest = PG_GETARG_BOOL(2);
3268
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3269
3270
0
  char     *schemaname;
3271
0
  Oid     nspid;
3272
3273
0
  schemaname = NameStr(*name);
3274
0
  nspid = LookupExplicitNamespace(schemaname, false);
3275
3276
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3277
0
                                 nulls, tableforest, targetns, true)));
3278
0
}
3279
3280
3281
/*
3282
 * Write the start element of the root element of an XML Schema mapping.
3283
 */
3284
static void
3285
xsd_schema_element_start(StringInfo result, const char *targetns)
3286
0
{
3287
0
  appendStringInfoString(result,
3288
0
               "<xsd:schema\n"
3289
0
               "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
3290
0
  if (strlen(targetns) > 0)
3291
0
    appendStringInfo(result,
3292
0
             "\n"
3293
0
             "    targetNamespace=\"%s\"\n"
3294
0
             "    elementFormDefault=\"qualified\"",
3295
0
             targetns);
3296
0
  appendStringInfoString(result,
3297
0
               ">\n\n");
3298
0
}
3299
3300
3301
static void
3302
xsd_schema_element_end(StringInfo result)
3303
0
{
3304
0
  appendStringInfoString(result, "</xsd:schema>");
3305
0
}
3306
3307
3308
static StringInfo
3309
schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3310
               bool tableforest, const char *targetns)
3311
0
{
3312
0
  Oid     nspid;
3313
0
  List     *relid_list;
3314
0
  List     *tupdesc_list;
3315
0
  ListCell   *cell;
3316
0
  StringInfo  result;
3317
3318
0
  result = makeStringInfo();
3319
3320
0
  nspid = LookupExplicitNamespace(schemaname, false);
3321
3322
0
  xsd_schema_element_start(result, targetns);
3323
3324
0
  SPI_connect();
3325
3326
0
  relid_list = schema_get_xml_visible_tables(nspid);
3327
3328
0
  tupdesc_list = NIL;
3329
0
  foreach(cell, relid_list)
3330
0
  {
3331
0
    Relation  rel;
3332
3333
0
    rel = table_open(lfirst_oid(cell), AccessShareLock);
3334
0
    tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3335
0
    table_close(rel, NoLock);
3336
0
  }
3337
3338
0
  appendStringInfoString(result,
3339
0
               map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3340
3341
0
  appendStringInfoString(result,
3342
0
               map_sql_schema_to_xmlschema_types(nspid, relid_list,
3343
0
                               nulls, tableforest, targetns));
3344
3345
0
  xsd_schema_element_end(result);
3346
3347
0
  SPI_finish();
3348
3349
0
  return result;
3350
0
}
3351
3352
3353
Datum
3354
schema_to_xmlschema(PG_FUNCTION_ARGS)
3355
0
{
3356
0
  Name    name = PG_GETARG_NAME(0);
3357
0
  bool    nulls = PG_GETARG_BOOL(1);
3358
0
  bool    tableforest = PG_GETARG_BOOL(2);
3359
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3360
3361
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3362
0
                                     nulls, tableforest, targetns)));
3363
0
}
3364
3365
3366
Datum
3367
schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3368
0
{
3369
0
  Name    name = PG_GETARG_NAME(0);
3370
0
  bool    nulls = PG_GETARG_BOOL(1);
3371
0
  bool    tableforest = PG_GETARG_BOOL(2);
3372
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3373
0
  char     *schemaname;
3374
0
  Oid     nspid;
3375
0
  StringInfo  xmlschema;
3376
3377
0
  schemaname = NameStr(*name);
3378
0
  nspid = LookupExplicitNamespace(schemaname, false);
3379
3380
0
  xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3381
0
                       tableforest, targetns);
3382
3383
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3384
0
                                 xmlschema->data, nulls,
3385
0
                                 tableforest, targetns, true)));
3386
0
}
3387
3388
3389
/*
3390
 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3391
 * sections 9.16, 9.17.
3392
 */
3393
3394
static StringInfo
3395
database_to_xml_internal(const char *xmlschema, bool nulls,
3396
             bool tableforest, const char *targetns)
3397
0
{
3398
0
  StringInfo  result;
3399
0
  List     *nspid_list;
3400
0
  ListCell   *cell;
3401
0
  char     *xmlcn;
3402
3403
0
  xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3404
0
                       true, false);
3405
0
  result = makeStringInfo();
3406
3407
0
  xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3408
0
  appendStringInfoChar(result, '\n');
3409
3410
0
  if (xmlschema)
3411
0
    appendStringInfo(result, "%s\n\n", xmlschema);
3412
3413
0
  SPI_connect();
3414
3415
0
  nspid_list = database_get_xml_visible_schemas();
3416
3417
0
  foreach(cell, nspid_list)
3418
0
  {
3419
0
    Oid     nspid = lfirst_oid(cell);
3420
0
    StringInfo  subres;
3421
3422
0
    subres = schema_to_xml_internal(nspid, NULL, nulls,
3423
0
                    tableforest, targetns, false);
3424
3425
0
    appendBinaryStringInfo(result, subres->data, subres->len);
3426
0
    appendStringInfoChar(result, '\n');
3427
0
  }
3428
3429
0
  SPI_finish();
3430
3431
0
  xmldata_root_element_end(result, xmlcn);
3432
3433
0
  return result;
3434
0
}
3435
3436
3437
Datum
3438
database_to_xml(PG_FUNCTION_ARGS)
3439
0
{
3440
0
  bool    nulls = PG_GETARG_BOOL(0);
3441
0
  bool    tableforest = PG_GETARG_BOOL(1);
3442
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3443
3444
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3445
0
                                   tableforest, targetns)));
3446
0
}
3447
3448
3449
static StringInfo
3450
database_to_xmlschema_internal(bool nulls, bool tableforest,
3451
                 const char *targetns)
3452
0
{
3453
0
  List     *relid_list;
3454
0
  List     *nspid_list;
3455
0
  List     *tupdesc_list;
3456
0
  ListCell   *cell;
3457
0
  StringInfo  result;
3458
3459
0
  result = makeStringInfo();
3460
3461
0
  xsd_schema_element_start(result, targetns);
3462
3463
0
  SPI_connect();
3464
3465
0
  relid_list = database_get_xml_visible_tables();
3466
0
  nspid_list = database_get_xml_visible_schemas();
3467
3468
0
  tupdesc_list = NIL;
3469
0
  foreach(cell, relid_list)
3470
0
  {
3471
0
    Relation  rel;
3472
3473
0
    rel = table_open(lfirst_oid(cell), AccessShareLock);
3474
0
    tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3475
0
    table_close(rel, NoLock);
3476
0
  }
3477
3478
0
  appendStringInfoString(result,
3479
0
               map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3480
3481
0
  appendStringInfoString(result,
3482
0
               map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3483
3484
0
  xsd_schema_element_end(result);
3485
3486
0
  SPI_finish();
3487
3488
0
  return result;
3489
0
}
3490
3491
3492
Datum
3493
database_to_xmlschema(PG_FUNCTION_ARGS)
3494
0
{
3495
0
  bool    nulls = PG_GETARG_BOOL(0);
3496
0
  bool    tableforest = PG_GETARG_BOOL(1);
3497
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3498
3499
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3500
0
                                     tableforest, targetns)));
3501
0
}
3502
3503
3504
Datum
3505
database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3506
0
{
3507
0
  bool    nulls = PG_GETARG_BOOL(0);
3508
0
  bool    tableforest = PG_GETARG_BOOL(1);
3509
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3510
0
  StringInfo  xmlschema;
3511
3512
0
  xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3513
3514
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3515
0
                                   nulls, tableforest, targetns)));
3516
0
}
3517
3518
3519
/*
3520
 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3521
 * 9.2.
3522
 */
3523
static char *
3524
map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3525
0
{
3526
0
  StringInfoData result;
3527
3528
0
  initStringInfo(&result);
3529
3530
0
  if (a)
3531
0
    appendStringInfoString(&result,
3532
0
                 map_sql_identifier_to_xml_name(a, true, true));
3533
0
  if (b)
3534
0
    appendStringInfo(&result, ".%s",
3535
0
             map_sql_identifier_to_xml_name(b, true, true));
3536
0
  if (c)
3537
0
    appendStringInfo(&result, ".%s",
3538
0
             map_sql_identifier_to_xml_name(c, true, true));
3539
0
  if (d)
3540
0
    appendStringInfo(&result, ".%s",
3541
0
             map_sql_identifier_to_xml_name(d, true, true));
3542
3543
0
  return result.data;
3544
0
}
3545
3546
3547
/*
3548
 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3549
 * section 9.11.
3550
 *
3551
 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3552
 * 9.9.
3553
 */
3554
static const char *
3555
map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3556
               bool tableforest, const char *targetns)
3557
0
{
3558
0
  int     i;
3559
0
  char     *xmltn;
3560
0
  char     *tabletypename;
3561
0
  char     *rowtypename;
3562
0
  StringInfoData result;
3563
3564
0
  initStringInfo(&result);
3565
3566
0
  if (OidIsValid(relid))
3567
0
  {
3568
0
    HeapTuple tuple;
3569
0
    Form_pg_class reltuple;
3570
3571
0
    tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3572
0
    if (!HeapTupleIsValid(tuple))
3573
0
      elog(ERROR, "cache lookup failed for relation %u", relid);
3574
0
    reltuple = (Form_pg_class) GETSTRUCT(tuple);
3575
3576
0
    xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3577
0
                         true, false);
3578
3579
0
    tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3580
0
                                 get_database_name(MyDatabaseId),
3581
0
                                 get_namespace_name(reltuple->relnamespace),
3582
0
                                 NameStr(reltuple->relname));
3583
3584
0
    rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3585
0
                                 get_database_name(MyDatabaseId),
3586
0
                                 get_namespace_name(reltuple->relnamespace),
3587
0
                                 NameStr(reltuple->relname));
3588
3589
0
    ReleaseSysCache(tuple);
3590
0
  }
3591
0
  else
3592
0
  {
3593
0
    if (tableforest)
3594
0
      xmltn = "row";
3595
0
    else
3596
0
      xmltn = "table";
3597
3598
0
    tabletypename = "TableType";
3599
0
    rowtypename = "RowType";
3600
0
  }
3601
3602
0
  xsd_schema_element_start(&result, targetns);
3603
3604
0
  appendStringInfoString(&result,
3605
0
               map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3606
3607
0
  appendStringInfo(&result,
3608
0
           "<xsd:complexType name=\"%s\">\n"
3609
0
           "  <xsd:sequence>\n",
3610
0
           rowtypename);
3611
3612
0
  for (i = 0; i < tupdesc->natts; i++)
3613
0
  {
3614
0
    Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3615
3616
0
    if (att->attisdropped)
3617
0
      continue;
3618
0
    appendStringInfo(&result,
3619
0
             "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3620
0
             map_sql_identifier_to_xml_name(NameStr(att->attname),
3621
0
                            true, false),
3622
0
             map_sql_type_to_xml_name(att->atttypid, -1),
3623
0
             nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3624
0
  }
3625
3626
0
  appendStringInfoString(&result,
3627
0
               "  </xsd:sequence>\n"
3628
0
               "</xsd:complexType>\n\n");
3629
3630
0
  if (!tableforest)
3631
0
  {
3632
0
    appendStringInfo(&result,
3633
0
             "<xsd:complexType name=\"%s\">\n"
3634
0
             "  <xsd:sequence>\n"
3635
0
             "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3636
0
             "  </xsd:sequence>\n"
3637
0
             "</xsd:complexType>\n\n",
3638
0
             tabletypename, rowtypename);
3639
3640
0
    appendStringInfo(&result,
3641
0
             "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3642
0
             xmltn, tabletypename);
3643
0
  }
3644
0
  else
3645
0
    appendStringInfo(&result,
3646
0
             "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3647
0
             xmltn, rowtypename);
3648
3649
0
  xsd_schema_element_end(&result);
3650
3651
0
  return result.data;
3652
0
}
3653
3654
3655
/*
3656
 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3657
 * section 9.12.
3658
 */
3659
static const char *
3660
map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3661
                  bool tableforest, const char *targetns)
3662
0
{
3663
0
  char     *dbname;
3664
0
  char     *nspname;
3665
0
  char     *xmlsn;
3666
0
  char     *schematypename;
3667
0
  StringInfoData result;
3668
0
  ListCell   *cell;
3669
3670
0
  dbname = get_database_name(MyDatabaseId);
3671
0
  nspname = get_namespace_name(nspid);
3672
3673
0
  initStringInfo(&result);
3674
3675
0
  xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3676
3677
0
  schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3678
0
                                dbname,
3679
0
                                nspname,
3680
0
                                NULL);
3681
3682
0
  appendStringInfo(&result,
3683
0
           "<xsd:complexType name=\"%s\">\n", schematypename);
3684
0
  if (!tableforest)
3685
0
    appendStringInfoString(&result,
3686
0
                 "  <xsd:all>\n");
3687
0
  else
3688
0
    appendStringInfoString(&result,
3689
0
                 "  <xsd:sequence>\n");
3690
3691
0
  foreach(cell, relid_list)
3692
0
  {
3693
0
    Oid     relid = lfirst_oid(cell);
3694
0
    char     *relname = get_rel_name(relid);
3695
0
    char     *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3696
0
    char     *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3697
0
                                       dbname,
3698
0
                                       nspname,
3699
0
                                       relname);
3700
3701
0
    if (!tableforest)
3702
0
      appendStringInfo(&result,
3703
0
               "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3704
0
               xmltn, tabletypename);
3705
0
    else
3706
0
      appendStringInfo(&result,
3707
0
               "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3708
0
               xmltn, tabletypename);
3709
0
  }
3710
3711
0
  if (!tableforest)
3712
0
    appendStringInfoString(&result,
3713
0
                 "  </xsd:all>\n");
3714
0
  else
3715
0
    appendStringInfoString(&result,
3716
0
                 "  </xsd:sequence>\n");
3717
0
  appendStringInfoString(&result,
3718
0
               "</xsd:complexType>\n\n");
3719
3720
0
  appendStringInfo(&result,
3721
0
           "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3722
0
           xmlsn, schematypename);
3723
3724
0
  return result.data;
3725
0
}
3726
3727
3728
/*
3729
 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3730
 * section 9.15.
3731
 */
3732
static const char *
3733
map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3734
                   bool tableforest, const char *targetns)
3735
0
{
3736
0
  char     *dbname;
3737
0
  char     *xmlcn;
3738
0
  char     *catalogtypename;
3739
0
  StringInfoData result;
3740
0
  ListCell   *cell;
3741
3742
0
  dbname = get_database_name(MyDatabaseId);
3743
3744
0
  initStringInfo(&result);
3745
3746
0
  xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3747
3748
0
  catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3749
0
                                 dbname,
3750
0
                                 NULL,
3751
0
                                 NULL);
3752
3753
0
  appendStringInfo(&result,
3754
0
           "<xsd:complexType name=\"%s\">\n", catalogtypename);
3755
0
  appendStringInfoString(&result,
3756
0
               "  <xsd:all>\n");
3757
3758
0
  foreach(cell, nspid_list)
3759
0
  {
3760
0
    Oid     nspid = lfirst_oid(cell);
3761
0
    char     *nspname = get_namespace_name(nspid);
3762
0
    char     *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3763
0
    char     *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3764
0
                                        dbname,
3765
0
                                        nspname,
3766
0
                                        NULL);
3767
3768
0
    appendStringInfo(&result,
3769
0
             "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3770
0
             xmlsn, schematypename);
3771
0
  }
3772
3773
0
  appendStringInfoString(&result,
3774
0
               "  </xsd:all>\n");
3775
0
  appendStringInfoString(&result,
3776
0
               "</xsd:complexType>\n\n");
3777
3778
0
  appendStringInfo(&result,
3779
0
           "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3780
0
           xmlcn, catalogtypename);
3781
3782
0
  return result.data;
3783
0
}
3784
3785
3786
/*
3787
 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3788
 */
3789
static const char *
3790
map_sql_type_to_xml_name(Oid typeoid, int typmod)
3791
0
{
3792
0
  StringInfoData result;
3793
3794
0
  initStringInfo(&result);
3795
3796
0
  switch (typeoid)
3797
0
  {
3798
0
    case BPCHAROID:
3799
0
      if (typmod == -1)
3800
0
        appendStringInfoString(&result, "CHAR");
3801
0
      else
3802
0
        appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3803
0
      break;
3804
0
    case VARCHAROID:
3805
0
      if (typmod == -1)
3806
0
        appendStringInfoString(&result, "VARCHAR");
3807
0
      else
3808
0
        appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3809
0
      break;
3810
0
    case NUMERICOID:
3811
0
      if (typmod == -1)
3812
0
        appendStringInfoString(&result, "NUMERIC");
3813
0
      else
3814
0
        appendStringInfo(&result, "NUMERIC_%d_%d",
3815
0
                 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3816
0
                 (typmod - VARHDRSZ) & 0xffff);
3817
0
      break;
3818
0
    case INT4OID:
3819
0
      appendStringInfoString(&result, "INTEGER");
3820
0
      break;
3821
0
    case INT2OID:
3822
0
      appendStringInfoString(&result, "SMALLINT");
3823
0
      break;
3824
0
    case INT8OID:
3825
0
      appendStringInfoString(&result, "BIGINT");
3826
0
      break;
3827
0
    case FLOAT4OID:
3828
0
      appendStringInfoString(&result, "REAL");
3829
0
      break;
3830
0
    case FLOAT8OID:
3831
0
      appendStringInfoString(&result, "DOUBLE");
3832
0
      break;
3833
0
    case BOOLOID:
3834
0
      appendStringInfoString(&result, "BOOLEAN");
3835
0
      break;
3836
0
    case TIMEOID:
3837
0
      if (typmod == -1)
3838
0
        appendStringInfoString(&result, "TIME");
3839
0
      else
3840
0
        appendStringInfo(&result, "TIME_%d", typmod);
3841
0
      break;
3842
0
    case TIMETZOID:
3843
0
      if (typmod == -1)
3844
0
        appendStringInfoString(&result, "TIME_WTZ");
3845
0
      else
3846
0
        appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3847
0
      break;
3848
0
    case TIMESTAMPOID:
3849
0
      if (typmod == -1)
3850
0
        appendStringInfoString(&result, "TIMESTAMP");
3851
0
      else
3852
0
        appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3853
0
      break;
3854
0
    case TIMESTAMPTZOID:
3855
0
      if (typmod == -1)
3856
0
        appendStringInfoString(&result, "TIMESTAMP_WTZ");
3857
0
      else
3858
0
        appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3859
0
      break;
3860
0
    case DATEOID:
3861
0
      appendStringInfoString(&result, "DATE");
3862
0
      break;
3863
0
    case XMLOID:
3864
0
      appendStringInfoString(&result, "XML");
3865
0
      break;
3866
0
    default:
3867
0
      {
3868
0
        HeapTuple tuple;
3869
0
        Form_pg_type typtuple;
3870
3871
0
        tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3872
0
        if (!HeapTupleIsValid(tuple))
3873
0
          elog(ERROR, "cache lookup failed for type %u", typeoid);
3874
0
        typtuple = (Form_pg_type) GETSTRUCT(tuple);
3875
3876
0
        appendStringInfoString(&result,
3877
0
                     map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3878
0
                                        get_database_name(MyDatabaseId),
3879
0
                                        get_namespace_name(typtuple->typnamespace),
3880
0
                                        NameStr(typtuple->typname)));
3881
3882
0
        ReleaseSysCache(tuple);
3883
0
      }
3884
0
  }
3885
3886
0
  return result.data;
3887
0
}
3888
3889
3890
/*
3891
 * Map a collection of SQL data types to XML Schema data types; see
3892
 * SQL/XML:2008 section 9.7.
3893
 */
3894
static const char *
3895
map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3896
0
{
3897
0
  List     *uniquetypes = NIL;
3898
0
  int     i;
3899
0
  StringInfoData result;
3900
0
  ListCell   *cell0;
3901
3902
  /* extract all column types used in the set of TupleDescs */
3903
0
  foreach(cell0, tupdesc_list)
3904
0
  {
3905
0
    TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3906
3907
0
    for (i = 0; i < tupdesc->natts; i++)
3908
0
    {
3909
0
      Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3910
3911
0
      if (att->attisdropped)
3912
0
        continue;
3913
0
      uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3914
0
    }
3915
0
  }
3916
3917
  /* add base types of domains */
3918
0
  foreach(cell0, uniquetypes)
3919
0
  {
3920
0
    Oid     typid = lfirst_oid(cell0);
3921
0
    Oid     basetypid = getBaseType(typid);
3922
3923
0
    if (basetypid != typid)
3924
0
      uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3925
0
  }
3926
3927
  /* Convert to textual form */
3928
0
  initStringInfo(&result);
3929
3930
0
  foreach(cell0, uniquetypes)
3931
0
  {
3932
0
    appendStringInfo(&result, "%s\n",
3933
0
             map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3934
0
                            -1));
3935
0
  }
3936
3937
0
  return result.data;
3938
0
}
3939
3940
3941
/*
3942
 * Map an SQL data type to a named XML Schema data type; see
3943
 * SQL/XML:2008 sections 9.5 and 9.6.
3944
 *
3945
 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3946
 * a name attribute, which this function does.  The name-less version
3947
 * 9.5 doesn't appear to be required anywhere.)
3948
 */
3949
static const char *
3950
map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3951
0
{
3952
0
  StringInfoData result;
3953
0
  const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3954
3955
0
  initStringInfo(&result);
3956
3957
0
  if (typeoid == XMLOID)
3958
0
  {
3959
0
    appendStringInfoString(&result,
3960
0
                 "<xsd:complexType mixed=\"true\">\n"
3961
0
                 "  <xsd:sequence>\n"
3962
0
                 "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3963
0
                 "  </xsd:sequence>\n"
3964
0
                 "</xsd:complexType>\n");
3965
0
  }
3966
0
  else
3967
0
  {
3968
0
    appendStringInfo(&result,
3969
0
             "<xsd:simpleType name=\"%s\">\n", typename);
3970
3971
0
    switch (typeoid)
3972
0
    {
3973
0
      case BPCHAROID:
3974
0
      case VARCHAROID:
3975
0
      case TEXTOID:
3976
0
        appendStringInfoString(&result,
3977
0
                     "  <xsd:restriction base=\"xsd:string\">\n");
3978
0
        if (typmod != -1)
3979
0
          appendStringInfo(&result,
3980
0
                   "    <xsd:maxLength value=\"%d\"/>\n",
3981
0
                   typmod - VARHDRSZ);
3982
0
        appendStringInfoString(&result, "  </xsd:restriction>\n");
3983
0
        break;
3984
3985
0
      case BYTEAOID:
3986
0
        appendStringInfo(&result,
3987
0
                 "  <xsd:restriction base=\"xsd:%s\">\n"
3988
0
                 "  </xsd:restriction>\n",
3989
0
                 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3990
0
        break;
3991
3992
0
      case NUMERICOID:
3993
0
        if (typmod != -1)
3994
0
          appendStringInfo(&result,
3995
0
                   "  <xsd:restriction base=\"xsd:decimal\">\n"
3996
0
                   "    <xsd:totalDigits value=\"%d\"/>\n"
3997
0
                   "    <xsd:fractionDigits value=\"%d\"/>\n"
3998
0
                   "  </xsd:restriction>\n",
3999
0
                   ((typmod - VARHDRSZ) >> 16) & 0xffff,
4000
0
                   (typmod - VARHDRSZ) & 0xffff);
4001
0
        break;
4002
4003
0
      case INT2OID:
4004
0
        appendStringInfo(&result,
4005
0
                 "  <xsd:restriction base=\"xsd:short\">\n"
4006
0
                 "    <xsd:maxInclusive value=\"%d\"/>\n"
4007
0
                 "    <xsd:minInclusive value=\"%d\"/>\n"
4008
0
                 "  </xsd:restriction>\n",
4009
0
                 SHRT_MAX, SHRT_MIN);
4010
0
        break;
4011
4012
0
      case INT4OID:
4013
0
        appendStringInfo(&result,
4014
0
                 "  <xsd:restriction base=\"xsd:int\">\n"
4015
0
                 "    <xsd:maxInclusive value=\"%d\"/>\n"
4016
0
                 "    <xsd:minInclusive value=\"%d\"/>\n"
4017
0
                 "  </xsd:restriction>\n",
4018
0
                 INT_MAX, INT_MIN);
4019
0
        break;
4020
4021
0
      case INT8OID:
4022
0
        appendStringInfo(&result,
4023
0
                 "  <xsd:restriction base=\"xsd:long\">\n"
4024
0
                 "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
4025
0
                 "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
4026
0
                 "  </xsd:restriction>\n",
4027
0
                 PG_INT64_MAX,
4028
0
                 PG_INT64_MIN);
4029
0
        break;
4030
4031
0
      case FLOAT4OID:
4032
0
        appendStringInfoString(&result,
4033
0
                     "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
4034
0
        break;
4035
4036
0
      case FLOAT8OID:
4037
0
        appendStringInfoString(&result,
4038
0
                     "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
4039
0
        break;
4040
4041
0
      case BOOLOID:
4042
0
        appendStringInfoString(&result,
4043
0
                     "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4044
0
        break;
4045
4046
0
      case TIMEOID:
4047
0
      case TIMETZOID:
4048
0
        {
4049
0
          const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4050
4051
0
          if (typmod == -1)
4052
0
            appendStringInfo(&result,
4053
0
                     "  <xsd:restriction base=\"xsd:time\">\n"
4054
0
                     "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4055
0
                     "  </xsd:restriction>\n", tz);
4056
0
          else if (typmod == 0)
4057
0
            appendStringInfo(&result,
4058
0
                     "  <xsd:restriction base=\"xsd:time\">\n"
4059
0
                     "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4060
0
                     "  </xsd:restriction>\n", tz);
4061
0
          else
4062
0
            appendStringInfo(&result,
4063
0
                     "  <xsd:restriction base=\"xsd:time\">\n"
4064
0
                     "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4065
0
                     "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4066
0
          break;
4067
0
        }
4068
4069
0
      case TIMESTAMPOID:
4070
0
      case TIMESTAMPTZOID:
4071
0
        {
4072
0
          const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4073
4074
0
          if (typmod == -1)
4075
0
            appendStringInfo(&result,
4076
0
                     "  <xsd:restriction base=\"xsd:dateTime\">\n"
4077
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4078
0
                     "  </xsd:restriction>\n", tz);
4079
0
          else if (typmod == 0)
4080
0
            appendStringInfo(&result,
4081
0
                     "  <xsd:restriction base=\"xsd:dateTime\">\n"
4082
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4083
0
                     "  </xsd:restriction>\n", tz);
4084
0
          else
4085
0
            appendStringInfo(&result,
4086
0
                     "  <xsd:restriction base=\"xsd:dateTime\">\n"
4087
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4088
0
                     "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4089
0
          break;
4090
0
        }
4091
4092
0
      case DATEOID:
4093
0
        appendStringInfoString(&result,
4094
0
                     "  <xsd:restriction base=\"xsd:date\">\n"
4095
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4096
0
                     "  </xsd:restriction>\n");
4097
0
        break;
4098
4099
0
      default:
4100
0
        if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4101
0
        {
4102
0
          Oid     base_typeoid;
4103
0
          int32   base_typmod = -1;
4104
4105
0
          base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4106
4107
0
          appendStringInfo(&result,
4108
0
                   "  <xsd:restriction base=\"%s\"/>\n",
4109
0
                   map_sql_type_to_xml_name(base_typeoid, base_typmod));
4110
0
        }
4111
0
        break;
4112
0
    }
4113
0
    appendStringInfoString(&result, "</xsd:simpleType>\n");
4114
0
  }
4115
4116
0
  return result.data;
4117
0
}
4118
4119
4120
/*
4121
 * Map an SQL row to an XML element, taking the row from the active
4122
 * SPI cursor.  See also SQL/XML:2008 section 9.10.
4123
 */
4124
static void
4125
SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4126
              bool nulls, bool tableforest,
4127
              const char *targetns, bool top_level)
4128
0
{
4129
0
  int     i;
4130
0
  char     *xmltn;
4131
4132
0
  if (tablename)
4133
0
    xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4134
0
  else
4135
0
  {
4136
0
    if (tableforest)
4137
0
      xmltn = "row";
4138
0
    else
4139
0
      xmltn = "table";
4140
0
  }
4141
4142
0
  if (tableforest)
4143
0
    xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4144
0
  else
4145
0
    appendStringInfoString(result, "<row>\n");
4146
4147
0
  for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4148
0
  {
4149
0
    char     *colname;
4150
0
    Datum   colval;
4151
0
    bool    isnull;
4152
4153
0
    colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4154
0
                         true, false);
4155
0
    colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4156
0
                 SPI_tuptable->tupdesc,
4157
0
                 i,
4158
0
                 &isnull);
4159
0
    if (isnull)
4160
0
    {
4161
0
      if (nulls)
4162
0
        appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
4163
0
    }
4164
0
    else
4165
0
      appendStringInfo(result, "  <%s>%s</%s>\n",
4166
0
               colname,
4167
0
               map_sql_value_to_xml_value(colval,
4168
0
                            SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4169
0
               colname);
4170
0
  }
4171
4172
0
  if (tableforest)
4173
0
  {
4174
0
    xmldata_root_element_end(result, xmltn);
4175
0
    appendStringInfoChar(result, '\n');
4176
0
  }
4177
0
  else
4178
0
    appendStringInfoString(result, "</row>\n\n");
4179
0
}
4180
4181
4182
/*
4183
 * XPath related functions
4184
 */
4185
4186
#ifdef USE_LIBXML
4187
4188
/*
4189
 * Convert XML node to text.
4190
 *
4191
 * For attribute and text nodes, return the escaped text.  For anything else,
4192
 * dump the whole subtree.
4193
 */
4194
static text *
4195
xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4196
{
4197
  xmltype    *result = NULL;
4198
4199
  if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4200
  {
4201
    void    (*volatile nodefree) (xmlNodePtr) = NULL;
4202
    volatile xmlBufferPtr buf = NULL;
4203
    volatile xmlNodePtr cur_copy = NULL;
4204
4205
    PG_TRY();
4206
    {
4207
      int     bytes;
4208
4209
      buf = xmlBufferCreate();
4210
      if (buf == NULL || xmlerrcxt->err_occurred)
4211
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4212
              "could not allocate xmlBuffer");
4213
4214
      /*
4215
       * Produce a dump of the node that we can serialize.  xmlNodeDump
4216
       * does that, but the result of that function won't contain
4217
       * namespace definitions from ancestor nodes, so we first do a
4218
       * xmlCopyNode() which duplicates the node along with its required
4219
       * namespace definitions.
4220
       *
4221
       * Some old libxml2 versions such as 2.7.6 produce partially
4222
       * broken XML_DOCUMENT_NODE nodes (unset content field) when
4223
       * copying them.  xmlNodeDump of such a node works fine, but
4224
       * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4225
       */
4226
      cur_copy = xmlCopyNode(cur, 1);
4227
      if (cur_copy == NULL || xmlerrcxt->err_occurred)
4228
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4229
              "could not copy node");
4230
      nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4231
        (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4232
4233
      bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4234
      if (bytes == -1 || xmlerrcxt->err_occurred)
4235
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4236
              "could not dump node");
4237
4238
      result = xmlBuffer_to_xmltype(buf);
4239
    }
4240
    PG_FINALLY();
4241
    {
4242
      if (nodefree)
4243
        nodefree(cur_copy);
4244
      if (buf)
4245
        xmlBufferFree(buf);
4246
    }
4247
    PG_END_TRY();
4248
  }
4249
  else
4250
  {
4251
    volatile xmlChar *str = NULL;
4252
4253
    PG_TRY();
4254
    {
4255
      char     *escaped;
4256
4257
      str = xmlXPathCastNodeToString(cur);
4258
      if (str == NULL || xmlerrcxt->err_occurred)
4259
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4260
              "could not allocate xmlChar");
4261
4262
      /* Here we rely on XML having the same representation as TEXT */
4263
      escaped = escape_xml((char *) str);
4264
4265
      result = (xmltype *) cstring_to_text(escaped);
4266
      pfree(escaped);
4267
    }
4268
    PG_FINALLY();
4269
    {
4270
      if (str)
4271
        xmlFree((xmlChar *) str);
4272
    }
4273
    PG_END_TRY();
4274
  }
4275
4276
  return result;
4277
}
4278
4279
/*
4280
 * Convert an XML XPath object (the result of evaluating an XPath expression)
4281
 * to an array of xml values, which are appended to astate.  The function
4282
 * result value is the number of elements in the array.
4283
 *
4284
 * If "astate" is NULL then we don't generate the array value, but we still
4285
 * return the number of elements it would have had.
4286
 *
4287
 * Nodesets are converted to an array containing the nodes' textual
4288
 * representations.  Primitive values (float, double, string) are converted
4289
 * to a single-element array containing the value's string representation.
4290
 */
4291
static int
4292
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4293
             ArrayBuildState *astate,
4294
             PgXmlErrorContext *xmlerrcxt)
4295
{
4296
  int     result = 0;
4297
  Datum   datum;
4298
  Oid     datumtype;
4299
  char     *result_str;
4300
4301
  switch (xpathobj->type)
4302
  {
4303
    case XPATH_NODESET:
4304
      if (xpathobj->nodesetval != NULL)
4305
      {
4306
        result = xpathobj->nodesetval->nodeNr;
4307
        if (astate != NULL)
4308
        {
4309
          int     i;
4310
4311
          for (i = 0; i < result; i++)
4312
          {
4313
            datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4314
                                   xmlerrcxt));
4315
            (void) accumArrayResult(astate, datum, false,
4316
                        XMLOID, CurrentMemoryContext);
4317
          }
4318
        }
4319
      }
4320
      return result;
4321
4322
    case XPATH_BOOLEAN:
4323
      if (astate == NULL)
4324
        return 1;
4325
      datum = BoolGetDatum(xpathobj->boolval);
4326
      datumtype = BOOLOID;
4327
      break;
4328
4329
    case XPATH_NUMBER:
4330
      if (astate == NULL)
4331
        return 1;
4332
      datum = Float8GetDatum(xpathobj->floatval);
4333
      datumtype = FLOAT8OID;
4334
      break;
4335
4336
    case XPATH_STRING:
4337
      if (astate == NULL)
4338
        return 1;
4339
      datum = CStringGetDatum((char *) xpathobj->stringval);
4340
      datumtype = CSTRINGOID;
4341
      break;
4342
4343
    default:
4344
      elog(ERROR, "xpath expression result type %d is unsupported",
4345
         xpathobj->type);
4346
      return 0;     /* keep compiler quiet */
4347
  }
4348
4349
  /* Common code for scalar-value cases */
4350
  result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4351
  datum = PointerGetDatum(cstring_to_xmltype(result_str));
4352
  (void) accumArrayResult(astate, datum, false,
4353
              XMLOID, CurrentMemoryContext);
4354
  return 1;
4355
}
4356
4357
4358
/*
4359
 * Common code for xpath() and xmlexists()
4360
 *
4361
 * Evaluate XPath expression and return number of nodes in res_nitems
4362
 * and array of XML values in astate.  Either of those pointers can be
4363
 * NULL if the corresponding result isn't wanted.
4364
 *
4365
 * It is up to the user to ensure that the XML passed is in fact
4366
 * an XML document - XPath doesn't work easily on fragments without
4367
 * a context node being known.
4368
 */
4369
static void
4370
xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4371
         int *res_nitems, ArrayBuildState *astate)
4372
{
4373
  PgXmlErrorContext *xmlerrcxt;
4374
  volatile xmlParserCtxtPtr ctxt = NULL;
4375
  volatile xmlDocPtr doc = NULL;
4376
  volatile xmlXPathContextPtr xpathctx = NULL;
4377
  volatile xmlXPathCompExprPtr xpathcomp = NULL;
4378
  volatile xmlXPathObjectPtr xpathobj = NULL;
4379
  char     *datastr;
4380
  int32   len;
4381
  int32   xpath_len;
4382
  xmlChar    *string;
4383
  xmlChar    *xpath_expr;
4384
  size_t    xmldecl_len = 0;
4385
  int     i;
4386
  int     ndim;
4387
  Datum    *ns_names_uris;
4388
  bool     *ns_names_uris_nulls;
4389
  int     ns_count;
4390
4391
  /*
4392
   * Namespace mappings are passed as text[].  If an empty array is passed
4393
   * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4394
   * Else, a 2-dimensional array with length of the second axis being equal
4395
   * to 2 should be passed, i.e., every subarray contains 2 elements, the
4396
   * first element defining the name, the second one the URI.  Example:
4397
   * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4398
   * 'http://example2.com']].
4399
   */
4400
  ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4401
  if (ndim != 0)
4402
  {
4403
    int      *dims;
4404
4405
    dims = ARR_DIMS(namespaces);
4406
4407
    if (ndim != 2 || dims[1] != 2)
4408
      ereport(ERROR,
4409
          (errcode(ERRCODE_DATA_EXCEPTION),
4410
           errmsg("invalid array for XML namespace mapping"),
4411
           errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4412
4413
    Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4414
4415
    deconstruct_array_builtin(namespaces, TEXTOID,
4416
                  &ns_names_uris, &ns_names_uris_nulls,
4417
                  &ns_count);
4418
4419
    Assert((ns_count % 2) == 0);  /* checked above */
4420
    ns_count /= 2;      /* count pairs only */
4421
  }
4422
  else
4423
  {
4424
    ns_names_uris = NULL;
4425
    ns_names_uris_nulls = NULL;
4426
    ns_count = 0;
4427
  }
4428
4429
  datastr = VARDATA(data);
4430
  len = VARSIZE(data) - VARHDRSZ;
4431
  xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4432
  if (xpath_len == 0)
4433
    ereport(ERROR,
4434
        (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4435
         errmsg("empty XPath expression")));
4436
4437
  string = pg_xmlCharStrndup(datastr, len);
4438
  xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4439
4440
  /*
4441
   * In a UTF8 database, skip any xml declaration, which might assert
4442
   * another encoding.  Ignore parse_xml_decl() failure, letting
4443
   * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
4444
   * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4445
   * those scenarios bug-compatible with historical behavior.
4446
   */
4447
  if (GetDatabaseEncoding() == PG_UTF8)
4448
    parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4449
4450
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4451
4452
  PG_TRY();
4453
  {
4454
    xmlInitParser();
4455
4456
    /*
4457
     * redundant XML parsing (two parsings for the same value during one
4458
     * command execution are possible)
4459
     */
4460
    ctxt = xmlNewParserCtxt();
4461
    if (ctxt == NULL || xmlerrcxt->err_occurred)
4462
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4463
            "could not allocate parser context");
4464
    doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4465
                len - xmldecl_len, NULL, NULL, 0);
4466
    if (doc == NULL || xmlerrcxt->err_occurred)
4467
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4468
            "could not parse XML document");
4469
    xpathctx = xmlXPathNewContext(doc);
4470
    if (xpathctx == NULL || xmlerrcxt->err_occurred)
4471
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4472
            "could not allocate XPath context");
4473
    xpathctx->node = (xmlNodePtr) doc;
4474
4475
    /* register namespaces, if any */
4476
    if (ns_count > 0)
4477
    {
4478
      for (i = 0; i < ns_count; i++)
4479
      {
4480
        char     *ns_name;
4481
        char     *ns_uri;
4482
4483
        if (ns_names_uris_nulls[i * 2] ||
4484
          ns_names_uris_nulls[i * 2 + 1])
4485
          ereport(ERROR,
4486
              (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4487
               errmsg("neither namespace name nor URI may be null")));
4488
        ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4489
        ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4490
        if (xmlXPathRegisterNs(xpathctx,
4491
                     (xmlChar *) ns_name,
4492
                     (xmlChar *) ns_uri) != 0)
4493
          ereport(ERROR,  /* is this an internal error??? */
4494
              (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4495
                  ns_name, ns_uri)));
4496
      }
4497
    }
4498
4499
    /*
4500
     * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4501
     * xmlXPathCompile.  In libxml2 2.13.3 and older, the latter function
4502
     * fails to defend itself against recursion-to-stack-overflow.  See
4503
     * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4504
     */
4505
    xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4506
    if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4507
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4508
            "invalid XPath expression");
4509
4510
    /*
4511
     * Version 2.6.27 introduces a function named
4512
     * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4513
     * but we can derive the existence by whether any nodes are returned,
4514
     * thereby preventing a library version upgrade and keeping the code
4515
     * the same.
4516
     */
4517
    xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4518
    if (xpathobj == NULL || xmlerrcxt->err_occurred)
4519
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4520
            "could not create XPath object");
4521
4522
    /*
4523
     * Extract the results as requested.
4524
     */
4525
    if (res_nitems != NULL)
4526
      *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4527
    else
4528
      (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4529
  }
4530
  PG_CATCH();
4531
  {
4532
    if (xpathobj)
4533
      xmlXPathFreeObject(xpathobj);
4534
    if (xpathcomp)
4535
      xmlXPathFreeCompExpr(xpathcomp);
4536
    if (xpathctx)
4537
      xmlXPathFreeContext(xpathctx);
4538
    if (doc)
4539
      xmlFreeDoc(doc);
4540
    if (ctxt)
4541
      xmlFreeParserCtxt(ctxt);
4542
4543
    pg_xml_done(xmlerrcxt, true);
4544
4545
    PG_RE_THROW();
4546
  }
4547
  PG_END_TRY();
4548
4549
  xmlXPathFreeObject(xpathobj);
4550
  xmlXPathFreeCompExpr(xpathcomp);
4551
  xmlXPathFreeContext(xpathctx);
4552
  xmlFreeDoc(doc);
4553
  xmlFreeParserCtxt(ctxt);
4554
4555
  pg_xml_done(xmlerrcxt, false);
4556
}
4557
#endif              /* USE_LIBXML */
4558
4559
/*
4560
 * Evaluate XPath expression and return array of XML values.
4561
 *
4562
 * As we have no support of XQuery sequences yet, this function seems
4563
 * to be the most useful one (array of XML functions plays a role of
4564
 * some kind of substitution for XQuery sequences).
4565
 */
4566
Datum
4567
xpath(PG_FUNCTION_ARGS)
4568
0
{
4569
#ifdef USE_LIBXML
4570
  text     *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4571
  xmltype    *data = PG_GETARG_XML_P(1);
4572
  ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4573
  ArrayBuildState *astate;
4574
4575
  astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4576
  xpath_internal(xpath_expr_text, data, namespaces,
4577
           NULL, astate);
4578
  PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4579
#else
4580
0
  NO_XML_SUPPORT();
4581
0
  return 0;
4582
0
#endif
4583
0
}
4584
4585
/*
4586
 * Determines if the node specified by the supplied XPath exists
4587
 * in a given XML document, returning a boolean.
4588
 */
4589
Datum
4590
xmlexists(PG_FUNCTION_ARGS)
4591
0
{
4592
#ifdef USE_LIBXML
4593
  text     *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4594
  xmltype    *data = PG_GETARG_XML_P(1);
4595
  int     res_nitems;
4596
4597
  xpath_internal(xpath_expr_text, data, NULL,
4598
           &res_nitems, NULL);
4599
4600
  PG_RETURN_BOOL(res_nitems > 0);
4601
#else
4602
0
  NO_XML_SUPPORT();
4603
0
  return 0;
4604
0
#endif
4605
0
}
4606
4607
/*
4608
 * Determines if the node specified by the supplied XPath exists
4609
 * in a given XML document, returning a boolean. Differs from
4610
 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4611
 */
4612
Datum
4613
xpath_exists(PG_FUNCTION_ARGS)
4614
0
{
4615
#ifdef USE_LIBXML
4616
  text     *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4617
  xmltype    *data = PG_GETARG_XML_P(1);
4618
  ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4619
  int     res_nitems;
4620
4621
  xpath_internal(xpath_expr_text, data, namespaces,
4622
           &res_nitems, NULL);
4623
4624
  PG_RETURN_BOOL(res_nitems > 0);
4625
#else
4626
0
  NO_XML_SUPPORT();
4627
0
  return 0;
4628
0
#endif
4629
0
}
4630
4631
/*
4632
 * Functions for checking well-formed-ness
4633
 */
4634
4635
#ifdef USE_LIBXML
4636
static bool
4637
wellformed_xml(text *data, XmlOptionType xmloption_arg)
4638
{
4639
  xmlDocPtr doc;
4640
  ErrorSaveContext escontext = {T_ErrorSaveContext};
4641
4642
  /*
4643
   * We'll report "true" if no soft error is reported by xml_parse().
4644
   */
4645
  doc = xml_parse(data, xmloption_arg, true,
4646
          GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4647
  if (doc)
4648
    xmlFreeDoc(doc);
4649
4650
  return !escontext.error_occurred;
4651
}
4652
#endif
4653
4654
Datum
4655
xml_is_well_formed(PG_FUNCTION_ARGS)
4656
0
{
4657
#ifdef USE_LIBXML
4658
  text     *data = PG_GETARG_TEXT_PP(0);
4659
4660
  PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4661
#else
4662
0
  NO_XML_SUPPORT();
4663
0
  return 0;
4664
0
#endif              /* not USE_LIBXML */
4665
0
}
4666
4667
Datum
4668
xml_is_well_formed_document(PG_FUNCTION_ARGS)
4669
0
{
4670
#ifdef USE_LIBXML
4671
  text     *data = PG_GETARG_TEXT_PP(0);
4672
4673
  PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4674
#else
4675
0
  NO_XML_SUPPORT();
4676
0
  return 0;
4677
0
#endif              /* not USE_LIBXML */
4678
0
}
4679
4680
Datum
4681
xml_is_well_formed_content(PG_FUNCTION_ARGS)
4682
0
{
4683
#ifdef USE_LIBXML
4684
  text     *data = PG_GETARG_TEXT_PP(0);
4685
4686
  PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4687
#else
4688
0
  NO_XML_SUPPORT();
4689
0
  return 0;
4690
0
#endif              /* not USE_LIBXML */
4691
0
}
4692
4693
/*
4694
 * support functions for XMLTABLE
4695
 *
4696
 */
4697
#ifdef USE_LIBXML
4698
4699
/*
4700
 * Returns private data from executor state. Ensure validity by check with
4701
 * MAGIC number.
4702
 */
4703
static inline XmlTableBuilderData *
4704
GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4705
{
4706
  XmlTableBuilderData *result;
4707
4708
  if (!IsA(state, TableFuncScanState))
4709
    elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4710
  result = (XmlTableBuilderData *) state->opaque;
4711
  if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4712
    elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4713
4714
  return result;
4715
}
4716
#endif
4717
4718
/*
4719
 * XmlTableInitOpaque
4720
 *    Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4721
 *    the XML parser.
4722
 *
4723
 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4724
 * XmlTableDestroyOpaque, it is critical for robustness that no other
4725
 * executor nodes run until this node is processed to completion.  Caller
4726
 * must execute this to completion (probably filling a tuplestore to exhaust
4727
 * this node in a single pass) instead of using row-per-call mode.
4728
 */
4729
static void
4730
XmlTableInitOpaque(TableFuncScanState *state, int natts)
4731
0
{
4732
#ifdef USE_LIBXML
4733
  volatile xmlParserCtxtPtr ctxt = NULL;
4734
  XmlTableBuilderData *xtCxt;
4735
  PgXmlErrorContext *xmlerrcxt;
4736
4737
  xtCxt = palloc0(sizeof(XmlTableBuilderData));
4738
  xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4739
  xtCxt->natts = natts;
4740
  xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4741
4742
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4743
4744
  PG_TRY();
4745
  {
4746
    xmlInitParser();
4747
4748
    ctxt = xmlNewParserCtxt();
4749
    if (ctxt == NULL || xmlerrcxt->err_occurred)
4750
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4751
            "could not allocate parser context");
4752
  }
4753
  PG_CATCH();
4754
  {
4755
    if (ctxt != NULL)
4756
      xmlFreeParserCtxt(ctxt);
4757
4758
    pg_xml_done(xmlerrcxt, true);
4759
4760
    PG_RE_THROW();
4761
  }
4762
  PG_END_TRY();
4763
4764
  xtCxt->xmlerrcxt = xmlerrcxt;
4765
  xtCxt->ctxt = ctxt;
4766
4767
  state->opaque = xtCxt;
4768
#else
4769
0
  NO_XML_SUPPORT();
4770
0
#endif              /* not USE_LIBXML */
4771
0
}
4772
4773
/*
4774
 * XmlTableSetDocument
4775
 *    Install the input document
4776
 */
4777
static void
4778
XmlTableSetDocument(TableFuncScanState *state, Datum value)
4779
0
{
4780
#ifdef USE_LIBXML
4781
  XmlTableBuilderData *xtCxt;
4782
  xmltype    *xmlval = DatumGetXmlP(value);
4783
  char     *str;
4784
  xmlChar    *xstr;
4785
  int     length;
4786
  volatile xmlDocPtr doc = NULL;
4787
  volatile xmlXPathContextPtr xpathcxt = NULL;
4788
4789
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4790
4791
  /*
4792
   * Use out function for casting to string (remove encoding property). See
4793
   * comment in xml_out.
4794
   */
4795
  str = xml_out_internal(xmlval, 0);
4796
4797
  length = strlen(str);
4798
  xstr = pg_xmlCharStrndup(str, length);
4799
4800
  PG_TRY();
4801
  {
4802
    doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4803
    if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4804
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4805
            "could not parse XML document");
4806
    xpathcxt = xmlXPathNewContext(doc);
4807
    if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4808
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4809
            "could not allocate XPath context");
4810
    xpathcxt->node = (xmlNodePtr) doc;
4811
  }
4812
  PG_CATCH();
4813
  {
4814
    if (xpathcxt != NULL)
4815
      xmlXPathFreeContext(xpathcxt);
4816
    if (doc != NULL)
4817
      xmlFreeDoc(doc);
4818
4819
    PG_RE_THROW();
4820
  }
4821
  PG_END_TRY();
4822
4823
  xtCxt->doc = doc;
4824
  xtCxt->xpathcxt = xpathcxt;
4825
#else
4826
0
  NO_XML_SUPPORT();
4827
0
#endif              /* not USE_LIBXML */
4828
0
}
4829
4830
/*
4831
 * XmlTableSetNamespace
4832
 *    Add a namespace declaration
4833
 */
4834
static void
4835
XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4836
0
{
4837
#ifdef USE_LIBXML
4838
  XmlTableBuilderData *xtCxt;
4839
4840
  if (name == NULL)
4841
    ereport(ERROR,
4842
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4843
         errmsg("DEFAULT namespace is not supported")));
4844
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4845
4846
  if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4847
               pg_xmlCharStrndup(name, strlen(name)),
4848
               pg_xmlCharStrndup(uri, strlen(uri))))
4849
    xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4850
          "could not set XML namespace");
4851
#else
4852
0
  NO_XML_SUPPORT();
4853
0
#endif              /* not USE_LIBXML */
4854
0
}
4855
4856
/*
4857
 * XmlTableSetRowFilter
4858
 *    Install the row-filter Xpath expression.
4859
 */
4860
static void
4861
XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4862
0
{
4863
#ifdef USE_LIBXML
4864
  XmlTableBuilderData *xtCxt;
4865
  xmlChar    *xstr;
4866
4867
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4868
4869
  if (*path == '\0')
4870
    ereport(ERROR,
4871
        (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4872
         errmsg("row path filter must not be empty string")));
4873
4874
  xstr = pg_xmlCharStrndup(path, strlen(path));
4875
4876
  /* We require XmlTableSetDocument to have been done already */
4877
  Assert(xtCxt->xpathcxt != NULL);
4878
4879
  xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4880
  if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4881
    xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4882
          "invalid XPath expression");
4883
#else
4884
0
  NO_XML_SUPPORT();
4885
0
#endif              /* not USE_LIBXML */
4886
0
}
4887
4888
/*
4889
 * XmlTableSetColumnFilter
4890
 *    Install the column-filter Xpath expression, for the given column.
4891
 */
4892
static void
4893
XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4894
0
{
4895
#ifdef USE_LIBXML
4896
  XmlTableBuilderData *xtCxt;
4897
  xmlChar    *xstr;
4898
4899
  Assert(PointerIsValid(path));
4900
4901
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4902
4903
  if (*path == '\0')
4904
    ereport(ERROR,
4905
        (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4906
         errmsg("column path filter must not be empty string")));
4907
4908
  xstr = pg_xmlCharStrndup(path, strlen(path));
4909
4910
  /* We require XmlTableSetDocument to have been done already */
4911
  Assert(xtCxt->xpathcxt != NULL);
4912
4913
  xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4914
  if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4915
    xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4916
          "invalid XPath expression");
4917
#else
4918
0
  NO_XML_SUPPORT();
4919
0
#endif              /* not USE_LIBXML */
4920
0
}
4921
4922
/*
4923
 * XmlTableFetchRow
4924
 *    Prepare the next "current" tuple for upcoming GetValue calls.
4925
 *    Returns false if the row-filter expression returned no more rows.
4926
 */
4927
static bool
4928
XmlTableFetchRow(TableFuncScanState *state)
4929
0
{
4930
#ifdef USE_LIBXML
4931
  XmlTableBuilderData *xtCxt;
4932
4933
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4934
4935
  /* Propagate our own error context to libxml2 */
4936
  xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4937
4938
  if (xtCxt->xpathobj == NULL)
4939
  {
4940
    xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4941
    if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4942
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4943
            "could not create XPath object");
4944
4945
    xtCxt->row_count = 0;
4946
  }
4947
4948
  if (xtCxt->xpathobj->type == XPATH_NODESET)
4949
  {
4950
    if (xtCxt->xpathobj->nodesetval != NULL)
4951
    {
4952
      if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4953
        return true;
4954
    }
4955
  }
4956
4957
  return false;
4958
#else
4959
0
  NO_XML_SUPPORT();
4960
0
  return false;
4961
0
#endif              /* not USE_LIBXML */
4962
0
}
4963
4964
/*
4965
 * XmlTableGetValue
4966
 *    Return the value for column number 'colnum' for the current row.  If
4967
 *    column -1 is requested, return representation of the whole row.
4968
 *
4969
 * This leaks memory, so be sure to reset often the context in which it's
4970
 * called.
4971
 */
4972
static Datum
4973
XmlTableGetValue(TableFuncScanState *state, int colnum,
4974
         Oid typid, int32 typmod, bool *isnull)
4975
0
{
4976
#ifdef USE_LIBXML
4977
  Datum   result = (Datum) 0;
4978
  XmlTableBuilderData *xtCxt;
4979
  volatile xmlXPathObjectPtr xpathobj = NULL;
4980
4981
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4982
4983
  Assert(xtCxt->xpathobj &&
4984
       xtCxt->xpathobj->type == XPATH_NODESET &&
4985
       xtCxt->xpathobj->nodesetval != NULL);
4986
4987
  /* Propagate our own error context to libxml2 */
4988
  xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4989
4990
  *isnull = false;
4991
4992
  Assert(xtCxt->xpathscomp[colnum] != NULL);
4993
4994
  PG_TRY();
4995
  {
4996
    xmlNodePtr  cur;
4997
    char     *cstr = NULL;
4998
4999
    /* Set current node as entry point for XPath evaluation */
5000
    cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
5001
    xtCxt->xpathcxt->node = cur;
5002
5003
    /* Evaluate column path */
5004
    xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
5005
    if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
5006
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
5007
            "could not create XPath object");
5008
5009
    /*
5010
     * There are four possible cases, depending on the number of nodes
5011
     * returned by the XPath expression and the type of the target column:
5012
     * a) XPath returns no nodes.  b) The target type is XML (return all
5013
     * as XML).  For non-XML return types:  c) One node (return content).
5014
     * d) Multiple nodes (error).
5015
     */
5016
    if (xpathobj->type == XPATH_NODESET)
5017
    {
5018
      int     count = 0;
5019
5020
      if (xpathobj->nodesetval != NULL)
5021
        count = xpathobj->nodesetval->nodeNr;
5022
5023
      if (xpathobj->nodesetval == NULL || count == 0)
5024
      {
5025
        *isnull = true;
5026
      }
5027
      else
5028
      {
5029
        if (typid == XMLOID)
5030
        {
5031
          text     *textstr;
5032
          StringInfoData str;
5033
5034
          /* Concatenate serialized values */
5035
          initStringInfo(&str);
5036
          for (int i = 0; i < count; i++)
5037
          {
5038
            textstr =
5039
              xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
5040
                         xtCxt->xmlerrcxt);
5041
5042
            appendStringInfoText(&str, textstr);
5043
          }
5044
          cstr = str.data;
5045
        }
5046
        else
5047
        {
5048
          xmlChar    *str;
5049
5050
          if (count > 1)
5051
            ereport(ERROR,
5052
                (errcode(ERRCODE_CARDINALITY_VIOLATION),
5053
                 errmsg("more than one value returned by column XPath expression")));
5054
5055
          str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5056
          cstr = str ? xml_pstrdup_and_free(str) : "";
5057
        }
5058
      }
5059
    }
5060
    else if (xpathobj->type == XPATH_STRING)
5061
    {
5062
      /* Content should be escaped when target will be XML */
5063
      if (typid == XMLOID)
5064
        cstr = escape_xml((char *) xpathobj->stringval);
5065
      else
5066
        cstr = (char *) xpathobj->stringval;
5067
    }
5068
    else if (xpathobj->type == XPATH_BOOLEAN)
5069
    {
5070
      char    typcategory;
5071
      bool    typispreferred;
5072
      xmlChar    *str;
5073
5074
      /* Allow implicit casting from boolean to numbers */
5075
      get_type_category_preferred(typid, &typcategory, &typispreferred);
5076
5077
      if (typcategory != TYPCATEGORY_NUMERIC)
5078
        str = xmlXPathCastBooleanToString(xpathobj->boolval);
5079
      else
5080
        str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5081
5082
      cstr = xml_pstrdup_and_free(str);
5083
    }
5084
    else if (xpathobj->type == XPATH_NUMBER)
5085
    {
5086
      xmlChar    *str;
5087
5088
      str = xmlXPathCastNumberToString(xpathobj->floatval);
5089
      cstr = xml_pstrdup_and_free(str);
5090
    }
5091
    else
5092
      elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5093
5094
    /*
5095
     * By here, either cstr contains the result value, or the isnull flag
5096
     * has been set.
5097
     */
5098
    Assert(cstr || *isnull);
5099
5100
    if (!*isnull)
5101
      result = InputFunctionCall(&state->in_functions[colnum],
5102
                     cstr,
5103
                     state->typioparams[colnum],
5104
                     typmod);
5105
  }
5106
  PG_FINALLY();
5107
  {
5108
    if (xpathobj != NULL)
5109
      xmlXPathFreeObject(xpathobj);
5110
  }
5111
  PG_END_TRY();
5112
5113
  return result;
5114
#else
5115
0
  NO_XML_SUPPORT();
5116
0
  return 0;
5117
0
#endif              /* not USE_LIBXML */
5118
0
}
5119
5120
/*
5121
 * XmlTableDestroyOpaque
5122
 *    Release all libxml2 resources
5123
 */
5124
static void
5125
XmlTableDestroyOpaque(TableFuncScanState *state)
5126
0
{
5127
#ifdef USE_LIBXML
5128
  XmlTableBuilderData *xtCxt;
5129
5130
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5131
5132
  /* Propagate our own error context to libxml2 */
5133
  xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5134
5135
  if (xtCxt->xpathscomp != NULL)
5136
  {
5137
    int     i;
5138
5139
    for (i = 0; i < xtCxt->natts; i++)
5140
      if (xtCxt->xpathscomp[i] != NULL)
5141
        xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5142
  }
5143
5144
  if (xtCxt->xpathobj != NULL)
5145
    xmlXPathFreeObject(xtCxt->xpathobj);
5146
  if (xtCxt->xpathcomp != NULL)
5147
    xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5148
  if (xtCxt->xpathcxt != NULL)
5149
    xmlXPathFreeContext(xtCxt->xpathcxt);
5150
  if (xtCxt->doc != NULL)
5151
    xmlFreeDoc(xtCxt->doc);
5152
  if (xtCxt->ctxt != NULL)
5153
    xmlFreeParserCtxt(xtCxt->ctxt);
5154
5155
  pg_xml_done(xtCxt->xmlerrcxt, true);
5156
5157
  /* not valid anymore */
5158
  xtCxt->magic = 0;
5159
  state->opaque = NULL;
5160
5161
#else
5162
0
  NO_XML_SUPPORT();
5163
0
#endif              /* not USE_LIBXML */
5164
0
}