Coverage Report

Created: 2025-06-15 06:31

/src/postgres/src/backend/utils/adt/xml.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * xml.c
4
 *    XML data type support.
5
 *
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * src/backend/utils/adt/xml.c
11
 *
12
 *-------------------------------------------------------------------------
13
 */
14
15
/*
16
 * Generally, XML type support is only available when libxml use was
17
 * configured during the build.  But even if that is not done, the
18
 * type and all the functions are available, but most of them will
19
 * fail.  For one thing, this avoids having to manage variant catalog
20
 * installations.  But it also has nice effects such as that you can
21
 * dump a database containing XML type data even if the server is not
22
 * linked with libxml.  Thus, make sure xml_out() works even if nothing
23
 * else does.
24
 */
25
26
/*
27
 * Notes on memory management:
28
 *
29
 * Sometimes libxml allocates global structures in the hope that it can reuse
30
 * them later on.  This makes it impractical to change the xmlMemSetup
31
 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32
 * allocated with malloc() or vice versa.  Since libxml might be used by
33
 * loadable modules, eg libperl, our only safe choices are to change the
34
 * functions at postmaster/backend launch or not at all.  Since we'd rather
35
 * not activate libxml in sessions that might never use it, the latter choice
36
 * is the preferred one.  However, for debugging purposes it can be awfully
37
 * handy to constrain libxml's allocations to be done in a specific palloc
38
 * context, where they're easy to track.  Therefore there is code here that
39
 * can be enabled in debug builds to redirect libxml's allocations into a
40
 * special context LibxmlContext.  It's not recommended to turn this on in
41
 * a production build because of the possibility of bad interactions with
42
 * external modules.
43
 */
44
/* #define USE_LIBXMLCONTEXT */
45
46
#include "postgres.h"
47
48
#ifdef USE_LIBXML
49
#include <libxml/chvalid.h>
50
#include <libxml/entities.h>
51
#include <libxml/parser.h>
52
#include <libxml/parserInternals.h>
53
#include <libxml/tree.h>
54
#include <libxml/uri.h>
55
#include <libxml/xmlerror.h>
56
#include <libxml/xmlsave.h>
57
#include <libxml/xmlversion.h>
58
#include <libxml/xmlwriter.h>
59
#include <libxml/xpath.h>
60
#include <libxml/xpathInternals.h>
61
62
/*
63
 * We used to check for xmlStructuredErrorContext via a configure test; but
64
 * that doesn't work on Windows, so instead use this grottier method of
65
 * testing the library version number.
66
 */
67
#if LIBXML_VERSION >= 20704
68
#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69
#endif
70
71
/*
72
 * libxml2 2.12 decided to insert "const" into the error handler API.
73
 */
74
#if LIBXML_VERSION >= 21200
75
#define PgXmlErrorPtr const xmlError *
76
#else
77
#define PgXmlErrorPtr xmlErrorPtr
78
#endif
79
80
#endif              /* USE_LIBXML */
81
82
#include "access/htup_details.h"
83
#include "access/table.h"
84
#include "catalog/namespace.h"
85
#include "catalog/pg_class.h"
86
#include "catalog/pg_type.h"
87
#include "commands/dbcommands.h"
88
#include "executor/spi.h"
89
#include "executor/tablefunc.h"
90
#include "fmgr.h"
91
#include "lib/stringinfo.h"
92
#include "libpq/pqformat.h"
93
#include "mb/pg_wchar.h"
94
#include "miscadmin.h"
95
#include "nodes/execnodes.h"
96
#include "nodes/miscnodes.h"
97
#include "nodes/nodeFuncs.h"
98
#include "utils/array.h"
99
#include "utils/builtins.h"
100
#include "utils/date.h"
101
#include "utils/datetime.h"
102
#include "utils/lsyscache.h"
103
#include "utils/rel.h"
104
#include "utils/syscache.h"
105
#include "utils/xml.h"
106
107
108
/* GUC variables */
109
int     xmlbinary = XMLBINARY_BASE64;
110
int     xmloption = XMLOPTION_CONTENT;
111
112
#ifdef USE_LIBXML
113
114
/* random number to identify PgXmlErrorContext */
115
#define ERRCXT_MAGIC  68275028
116
117
struct PgXmlErrorContext
118
{
119
  int     magic;
120
  /* strictness argument passed to pg_xml_init */
121
  PgXmlStrictness strictness;
122
  /* current error status and accumulated message, if any */
123
  bool    err_occurred;
124
  StringInfoData err_buf;
125
  /* previous libxml error handling state (saved by pg_xml_init) */
126
  xmlStructuredErrorFunc saved_errfunc;
127
  void     *saved_errcxt;
128
  /* previous libxml entity handler (saved by pg_xml_init) */
129
  xmlExternalEntityLoader saved_entityfunc;
130
};
131
132
static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
133
                       xmlParserCtxtPtr ctxt);
134
static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
135
            int sqlcode, const char *msg);
136
static void xml_errorHandler(void *data, PgXmlErrorPtr error);
137
static int  errdetail_for_xml_code(int code);
138
static void chopStringInfoNewlines(StringInfo str);
139
static void appendStringInfoLineSeparator(StringInfo str);
140
141
#ifdef USE_LIBXMLCONTEXT
142
143
static MemoryContext LibxmlContext = NULL;
144
145
static void xml_memory_init(void);
146
static void *xml_palloc(size_t size);
147
static void *xml_repalloc(void *ptr, size_t size);
148
static void xml_pfree(void *ptr);
149
static char *xml_pstrdup(const char *string);
150
#endif              /* USE_LIBXMLCONTEXT */
151
152
static xmlChar *xml_text2xmlChar(text *in);
153
static int  parse_xml_decl(const xmlChar *str, size_t *lenp,
154
               xmlChar **version, xmlChar **encoding, int *standalone);
155
static bool print_xml_decl(StringInfo buf, const xmlChar *version,
156
               pg_enc encoding, int standalone);
157
static bool xml_doctype_in_content(const xmlChar *str);
158
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
159
               bool preserve_whitespace, int encoding,
160
               XmlOptionType *parsed_xmloptiontype,
161
               xmlNodePtr *parsed_nodes,
162
               Node *escontext);
163
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
164
static int  xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
165
                   ArrayBuildState *astate,
166
                   PgXmlErrorContext *xmlerrcxt);
167
static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
168
#endif              /* USE_LIBXML */
169
170
static void xmldata_root_element_start(StringInfo result, const char *eltname,
171
                     const char *xmlschema, const char *targetns,
172
                     bool top_level);
173
static void xmldata_root_element_end(StringInfo result, const char *eltname);
174
static StringInfo query_to_xml_internal(const char *query, char *tablename,
175
                    const char *xmlschema, bool nulls, bool tableforest,
176
                    const char *targetns, bool top_level);
177
static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
178
                        bool nulls, bool tableforest, const char *targetns);
179
static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
180
                           List *relid_list, bool nulls,
181
                           bool tableforest, const char *targetns);
182
static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
183
                            bool nulls, bool tableforest,
184
                            const char *targetns);
185
static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
186
static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
187
static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
188
static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
189
                    char *tablename, bool nulls, bool tableforest,
190
                    const char *targetns, bool top_level);
191
192
/* XMLTABLE support */
193
#ifdef USE_LIBXML
194
/* random number to identify XmlTableContext */
195
#define XMLTABLE_CONTEXT_MAGIC  46922182
196
typedef struct XmlTableBuilderData
197
{
198
  int     magic;
199
  int     natts;
200
  long int  row_count;
201
  PgXmlErrorContext *xmlerrcxt;
202
  xmlParserCtxtPtr ctxt;
203
  xmlDocPtr doc;
204
  xmlXPathContextPtr xpathcxt;
205
  xmlXPathCompExprPtr xpathcomp;
206
  xmlXPathObjectPtr xpathobj;
207
  xmlXPathCompExprPtr *xpathscomp;
208
} XmlTableBuilderData;
209
#endif
210
211
static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
212
static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
213
static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
214
                 const char *uri);
215
static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
216
static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
217
                  const char *path, int colnum);
218
static bool XmlTableFetchRow(struct TableFuncScanState *state);
219
static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
220
                Oid typid, int32 typmod, bool *isnull);
221
static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
222
223
const TableFuncRoutine XmlTableRoutine =
224
{
225
  .InitOpaque = XmlTableInitOpaque,
226
  .SetDocument = XmlTableSetDocument,
227
  .SetNamespace = XmlTableSetNamespace,
228
  .SetRowFilter = XmlTableSetRowFilter,
229
  .SetColumnFilter = XmlTableSetColumnFilter,
230
  .FetchRow = XmlTableFetchRow,
231
  .GetValue = XmlTableGetValue,
232
  .DestroyOpaque = XmlTableDestroyOpaque
233
};
234
235
#define NO_XML_SUPPORT() \
236
0
  ereport(ERROR, \
237
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
238
0
       errmsg("unsupported XML feature"), \
239
0
       errdetail("This functionality requires the server to be built with libxml support.")))
240
241
242
/* from SQL/XML:2008 section 4.9 */
243
#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
244
#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
245
#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
246
247
248
#ifdef USE_LIBXML
249
250
static int
251
xmlChar_to_encoding(const xmlChar *encoding_name)
252
{
253
  int     encoding = pg_char_to_encoding((const char *) encoding_name);
254
255
  if (encoding < 0)
256
    ereport(ERROR,
257
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
258
         errmsg("invalid encoding name \"%s\"",
259
            (const char *) encoding_name)));
260
  return encoding;
261
}
262
#endif
263
264
265
/*
266
 * xml_in uses a plain C string to VARDATA conversion, so for the time being
267
 * we use the conversion function for the text datatype.
268
 *
269
 * This is only acceptable so long as xmltype and text use the same
270
 * representation.
271
 */
272
Datum
273
xml_in(PG_FUNCTION_ARGS)
274
0
{
275
#ifdef USE_LIBXML
276
  char     *s = PG_GETARG_CSTRING(0);
277
  xmltype    *vardata;
278
  xmlDocPtr doc;
279
280
  /* Build the result object. */
281
  vardata = (xmltype *) cstring_to_text(s);
282
283
  /*
284
   * Parse the data to check if it is well-formed XML data.
285
   *
286
   * Note: we don't need to worry about whether a soft error is detected.
287
   */
288
  doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
289
          NULL, NULL, fcinfo->context);
290
  if (doc != NULL)
291
    xmlFreeDoc(doc);
292
293
  PG_RETURN_XML_P(vardata);
294
#else
295
0
  NO_XML_SUPPORT();
296
0
  return 0;
297
0
#endif
298
0
}
299
300
301
#define PG_XML_DEFAULT_VERSION "1.0"
302
303
304
/*
305
 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
306
 * time being we use the conversion function for the text datatype.
307
 *
308
 * This is only acceptable so long as xmltype and text use the same
309
 * representation.
310
 */
311
static char *
312
xml_out_internal(xmltype *x, pg_enc target_encoding)
313
0
{
314
0
  char     *str = text_to_cstring((text *) x);
315
316
#ifdef USE_LIBXML
317
  size_t    len = strlen(str);
318
  xmlChar    *version;
319
  int     standalone;
320
  int     res_code;
321
322
  if ((res_code = parse_xml_decl((xmlChar *) str,
323
                   &len, &version, NULL, &standalone)) == 0)
324
  {
325
    StringInfoData buf;
326
327
    initStringInfo(&buf);
328
329
    if (!print_xml_decl(&buf, version, target_encoding, standalone))
330
    {
331
      /*
332
       * If we are not going to produce an XML declaration, eat a single
333
       * newline in the original string to prevent empty first lines in
334
       * the output.
335
       */
336
      if (*(str + len) == '\n')
337
        len += 1;
338
    }
339
    appendStringInfoString(&buf, str + len);
340
341
    pfree(str);
342
343
    return buf.data;
344
  }
345
346
  ereport(WARNING,
347
      errcode(ERRCODE_DATA_CORRUPTED),
348
      errmsg_internal("could not parse XML declaration in stored value"),
349
      errdetail_for_xml_code(res_code));
350
#endif
351
0
  return str;
352
0
}
353
354
355
Datum
356
xml_out(PG_FUNCTION_ARGS)
357
0
{
358
0
  xmltype    *x = PG_GETARG_XML_P(0);
359
360
  /*
361
   * xml_out removes the encoding property in all cases.  This is because we
362
   * cannot control from here whether the datum will be converted to a
363
   * different client encoding, so we'd do more harm than good by including
364
   * it.
365
   */
366
0
  PG_RETURN_CSTRING(xml_out_internal(x, 0));
367
0
}
368
369
370
Datum
371
xml_recv(PG_FUNCTION_ARGS)
372
0
{
373
#ifdef USE_LIBXML
374
  StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
375
  xmltype    *result;
376
  char     *str;
377
  char     *newstr;
378
  int     nbytes;
379
  xmlDocPtr doc;
380
  xmlChar    *encodingStr = NULL;
381
  int     encoding;
382
383
  /*
384
   * Read the data in raw format. We don't know yet what the encoding is, as
385
   * that information is embedded in the xml declaration; so we have to
386
   * parse that before converting to server encoding.
387
   */
388
  nbytes = buf->len - buf->cursor;
389
  str = (char *) pq_getmsgbytes(buf, nbytes);
390
391
  /*
392
   * We need a null-terminated string to pass to parse_xml_decl().  Rather
393
   * than make a separate copy, make the temporary result one byte bigger
394
   * than it needs to be.
395
   */
396
  result = palloc(nbytes + 1 + VARHDRSZ);
397
  SET_VARSIZE(result, nbytes + VARHDRSZ);
398
  memcpy(VARDATA(result), str, nbytes);
399
  str = VARDATA(result);
400
  str[nbytes] = '\0';
401
402
  parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403
404
  /*
405
   * If encoding wasn't explicitly specified in the XML header, treat it as
406
   * UTF-8, as that's the default in XML. This is different from xml_in(),
407
   * where the input has to go through the normal client to server encoding
408
   * conversion.
409
   */
410
  encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411
412
  /*
413
   * Parse the data to check if it is well-formed XML data.  Assume that
414
   * xml_parse will throw ERROR if not.
415
   */
416
  doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417
  xmlFreeDoc(doc);
418
419
  /* Now that we know what we're dealing with, convert to server encoding */
420
  newstr = pg_any_to_server(str, nbytes, encoding);
421
422
  if (newstr != str)
423
  {
424
    pfree(result);
425
    result = (xmltype *) cstring_to_text(newstr);
426
    pfree(newstr);
427
  }
428
429
  PG_RETURN_XML_P(result);
430
#else
431
0
  NO_XML_SUPPORT();
432
0
  return 0;
433
0
#endif
434
0
}
435
436
437
Datum
438
xml_send(PG_FUNCTION_ARGS)
439
0
{
440
0
  xmltype    *x = PG_GETARG_XML_P(0);
441
0
  char     *outval;
442
0
  StringInfoData buf;
443
444
  /*
445
   * xml_out_internal doesn't convert the encoding, it just prints the right
446
   * declaration. pq_sendtext will do the conversion.
447
   */
448
0
  outval = xml_out_internal(x, pg_get_client_encoding());
449
450
0
  pq_begintypsend(&buf);
451
0
  pq_sendtext(&buf, outval, strlen(outval));
452
0
  pfree(outval);
453
0
  PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454
0
}
455
456
457
#ifdef USE_LIBXML
458
static void
459
appendStringInfoText(StringInfo str, const text *t)
460
{
461
  appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462
}
463
#endif
464
465
466
static xmltype *
467
stringinfo_to_xmltype(StringInfo buf)
468
0
{
469
0
  return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470
0
}
471
472
473
static xmltype *
474
cstring_to_xmltype(const char *string)
475
0
{
476
0
  return (xmltype *) cstring_to_text(string);
477
0
}
478
479
480
#ifdef USE_LIBXML
481
static xmltype *
482
xmlBuffer_to_xmltype(xmlBufferPtr buf)
483
{
484
  return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485
                        xmlBufferLength(buf));
486
}
487
#endif
488
489
490
Datum
491
xmlcomment(PG_FUNCTION_ARGS)
492
0
{
493
#ifdef USE_LIBXML
494
  text     *arg = PG_GETARG_TEXT_PP(0);
495
  char     *argdata = VARDATA_ANY(arg);
496
  int     len = VARSIZE_ANY_EXHDR(arg);
497
  StringInfoData buf;
498
  int     i;
499
500
  /* check for "--" in string or "-" at the end */
501
  for (i = 1; i < len; i++)
502
  {
503
    if (argdata[i] == '-' && argdata[i - 1] == '-')
504
      ereport(ERROR,
505
          (errcode(ERRCODE_INVALID_XML_COMMENT),
506
           errmsg("invalid XML comment")));
507
  }
508
  if (len > 0 && argdata[len - 1] == '-')
509
    ereport(ERROR,
510
        (errcode(ERRCODE_INVALID_XML_COMMENT),
511
         errmsg("invalid XML comment")));
512
513
  initStringInfo(&buf);
514
  appendStringInfoString(&buf, "<!--");
515
  appendStringInfoText(&buf, arg);
516
  appendStringInfoString(&buf, "-->");
517
518
  PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519
#else
520
0
  NO_XML_SUPPORT();
521
0
  return 0;
522
0
#endif
523
0
}
524
525
526
Datum
527
xmltext(PG_FUNCTION_ARGS)
528
0
{
529
#ifdef USE_LIBXML
530
  text     *arg = PG_GETARG_TEXT_PP(0);
531
  text     *result;
532
  xmlChar    *xmlbuf = NULL;
533
534
  xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
535
536
  Assert(xmlbuf);
537
538
  result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
539
  xmlFree(xmlbuf);
540
  PG_RETURN_XML_P(result);
541
#else
542
0
  NO_XML_SUPPORT();
543
0
  return 0;
544
0
#endif              /* not USE_LIBXML */
545
0
}
546
547
548
/*
549
 * TODO: xmlconcat needs to merge the notations and unparsed entities
550
 * of the argument values.  Not very important in practice, though.
551
 */
552
xmltype *
553
xmlconcat(List *args)
554
0
{
555
#ifdef USE_LIBXML
556
  int     global_standalone = 1;
557
  xmlChar    *global_version = NULL;
558
  bool    global_version_no_value = false;
559
  StringInfoData buf;
560
  ListCell   *v;
561
562
  initStringInfo(&buf);
563
  foreach(v, args)
564
  {
565
    xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
566
    size_t    len;
567
    xmlChar    *version;
568
    int     standalone;
569
    char     *str;
570
571
    len = VARSIZE(x) - VARHDRSZ;
572
    str = text_to_cstring((text *) x);
573
574
    parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
575
576
    if (standalone == 0 && global_standalone == 1)
577
      global_standalone = 0;
578
    if (standalone < 0)
579
      global_standalone = -1;
580
581
    if (!version)
582
      global_version_no_value = true;
583
    else if (!global_version)
584
      global_version = version;
585
    else if (xmlStrcmp(version, global_version) != 0)
586
      global_version_no_value = true;
587
588
    appendStringInfoString(&buf, str + len);
589
    pfree(str);
590
  }
591
592
  if (!global_version_no_value || global_standalone >= 0)
593
  {
594
    StringInfoData buf2;
595
596
    initStringInfo(&buf2);
597
598
    print_xml_decl(&buf2,
599
             (!global_version_no_value) ? global_version : NULL,
600
             0,
601
             global_standalone);
602
603
    appendBinaryStringInfo(&buf2, buf.data, buf.len);
604
    buf = buf2;
605
  }
606
607
  return stringinfo_to_xmltype(&buf);
608
#else
609
0
  NO_XML_SUPPORT();
610
0
  return NULL;
611
0
#endif
612
0
}
613
614
615
/*
616
 * XMLAGG support
617
 */
618
Datum
619
xmlconcat2(PG_FUNCTION_ARGS)
620
0
{
621
0
  if (PG_ARGISNULL(0))
622
0
  {
623
0
    if (PG_ARGISNULL(1))
624
0
      PG_RETURN_NULL();
625
0
    else
626
0
      PG_RETURN_XML_P(PG_GETARG_XML_P(1));
627
0
  }
628
0
  else if (PG_ARGISNULL(1))
629
0
    PG_RETURN_XML_P(PG_GETARG_XML_P(0));
630
0
  else
631
0
    PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
632
0
                       PG_GETARG_XML_P(1))));
633
0
}
634
635
636
Datum
637
texttoxml(PG_FUNCTION_ARGS)
638
0
{
639
0
  text     *data = PG_GETARG_TEXT_PP(0);
640
641
0
  PG_RETURN_XML_P(xmlparse(data, xmloption, true));
642
0
}
643
644
645
Datum
646
xmltotext(PG_FUNCTION_ARGS)
647
0
{
648
0
  xmltype    *data = PG_GETARG_XML_P(0);
649
650
  /* It's actually binary compatible. */
651
0
  PG_RETURN_TEXT_P((text *) data);
652
0
}
653
654
655
text *
656
xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
657
0
{
658
#ifdef USE_LIBXML
659
  text     *volatile result;
660
  xmlDocPtr doc;
661
  XmlOptionType parsed_xmloptiontype;
662
  xmlNodePtr  content_nodes;
663
  volatile xmlBufferPtr buf = NULL;
664
  volatile xmlSaveCtxtPtr ctxt = NULL;
665
  ErrorSaveContext escontext = {T_ErrorSaveContext};
666
  PgXmlErrorContext *xmlerrcxt;
667
#endif
668
669
0
  if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
670
0
  {
671
    /*
672
     * We don't actually need to do anything, so just return the
673
     * binary-compatible input.  For backwards-compatibility reasons,
674
     * allow such cases to succeed even without USE_LIBXML.
675
     */
676
0
    return (text *) data;
677
0
  }
678
679
#ifdef USE_LIBXML
680
681
  /*
682
   * Parse the input according to the xmloption.
683
   *
684
   * preserve_whitespace is set to false in case we are indenting, otherwise
685
   * libxml2 will fail to indent elements that have whitespace between them.
686
   */
687
  doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
688
          &parsed_xmloptiontype, &content_nodes,
689
          (Node *) &escontext);
690
  if (doc == NULL || escontext.error_occurred)
691
  {
692
    if (doc)
693
      xmlFreeDoc(doc);
694
    /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
695
    ereport(ERROR,
696
        (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
697
         errmsg("not an XML document")));
698
  }
699
700
  /* If we weren't asked to indent, we're done. */
701
  if (!indent)
702
  {
703
    xmlFreeDoc(doc);
704
    return (text *) data;
705
  }
706
707
  /* Otherwise, we gotta spin up some error handling. */
708
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
709
710
  PG_TRY();
711
  {
712
    size_t    decl_len = 0;
713
714
    /* The serialized data will go into this buffer. */
715
    buf = xmlBufferCreate();
716
717
    if (buf == NULL || xmlerrcxt->err_occurred)
718
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
719
            "could not allocate xmlBuffer");
720
721
    /* Detect whether there's an XML declaration */
722
    parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
723
724
    /*
725
     * Emit declaration only if the input had one.  Note: some versions of
726
     * xmlSaveToBuffer leak memory if a non-null encoding argument is
727
     * passed, so don't do that.  We don't want any encoding conversion
728
     * anyway.
729
     */
730
    if (decl_len == 0)
731
      ctxt = xmlSaveToBuffer(buf, NULL,
732
                   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
733
    else
734
      ctxt = xmlSaveToBuffer(buf, NULL,
735
                   XML_SAVE_FORMAT);
736
737
    if (ctxt == NULL || xmlerrcxt->err_occurred)
738
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
739
            "could not allocate xmlSaveCtxt");
740
741
    if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
742
    {
743
      /* If it's a document, saving is easy. */
744
      if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
745
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
746
              "could not save document to xmlBuffer");
747
    }
748
    else if (content_nodes != NULL)
749
    {
750
      /*
751
       * Deal with the case where we have non-singly-rooted XML.
752
       * libxml's dump functions don't work well for that without help.
753
       * We build a fake root node that serves as a container for the
754
       * content nodes, and then iterate over the nodes.
755
       */
756
      xmlNodePtr  root;
757
      xmlNodePtr  oldroot;
758
      xmlNodePtr  newline;
759
760
      root = xmlNewNode(NULL, (const xmlChar *) "content-root");
761
      if (root == NULL || xmlerrcxt->err_occurred)
762
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
763
              "could not allocate xml node");
764
765
      /*
766
       * This attaches root to doc, so we need not free it separately...
767
       * but instead, we have to free the old root if there was one.
768
       */
769
      oldroot = xmlDocSetRootElement(doc, root);
770
      if (oldroot != NULL)
771
        xmlFreeNode(oldroot);
772
773
      xmlAddChildList(root, content_nodes);
774
775
      /*
776
       * We use this node to insert newlines in the dump.  Note: in at
777
       * least some libxml versions, xmlNewDocText would not attach the
778
       * node to the document even if we passed it.  Therefore, manage
779
       * freeing of this node manually, and pass NULL here to make sure
780
       * there's not a dangling link.
781
       */
782
      newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
783
      if (newline == NULL || xmlerrcxt->err_occurred)
784
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
785
              "could not allocate xml node");
786
787
      for (xmlNodePtr node = root->children; node; node = node->next)
788
      {
789
        /* insert newlines between nodes */
790
        if (node->type != XML_TEXT_NODE && node->prev != NULL)
791
        {
792
          if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
793
          {
794
            xmlFreeNode(newline);
795
            xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
796
                  "could not save newline to xmlBuffer");
797
          }
798
        }
799
800
        if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
801
        {
802
          xmlFreeNode(newline);
803
          xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
804
                "could not save content to xmlBuffer");
805
        }
806
      }
807
808
      xmlFreeNode(newline);
809
    }
810
811
    if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
812
    {
813
      ctxt = NULL;    /* don't try to close it again */
814
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
815
            "could not close xmlSaveCtxtPtr");
816
    }
817
818
    /*
819
     * xmlDocContentDumpOutput may add a trailing newline, so remove that.
820
     */
821
    if (xmloption_arg == XMLOPTION_DOCUMENT)
822
    {
823
      const char *str = (const char *) xmlBufferContent(buf);
824
      int     len = xmlBufferLength(buf);
825
826
      while (len > 0 && (str[len - 1] == '\n' ||
827
                 str[len - 1] == '\r'))
828
        len--;
829
830
      result = cstring_to_text_with_len(str, len);
831
    }
832
    else
833
      result = (text *) xmlBuffer_to_xmltype(buf);
834
  }
835
  PG_CATCH();
836
  {
837
    if (ctxt)
838
      xmlSaveClose(ctxt);
839
    if (buf)
840
      xmlBufferFree(buf);
841
    if (doc)
842
      xmlFreeDoc(doc);
843
844
    pg_xml_done(xmlerrcxt, true);
845
846
    PG_RE_THROW();
847
  }
848
  PG_END_TRY();
849
850
  xmlBufferFree(buf);
851
  xmlFreeDoc(doc);
852
853
  pg_xml_done(xmlerrcxt, false);
854
855
  return result;
856
#else
857
0
  NO_XML_SUPPORT();
858
0
  return NULL;
859
0
#endif
860
0
}
861
862
863
xmltype *
864
xmlelement(XmlExpr *xexpr,
865
       Datum *named_argvalue, bool *named_argnull,
866
       Datum *argvalue, bool *argnull)
867
0
{
868
#ifdef USE_LIBXML
869
  xmltype    *result;
870
  List     *named_arg_strings;
871
  List     *arg_strings;
872
  int     i;
873
  ListCell   *arg;
874
  ListCell   *narg;
875
  PgXmlErrorContext *xmlerrcxt;
876
  volatile xmlBufferPtr buf = NULL;
877
  volatile xmlTextWriterPtr writer = NULL;
878
879
  /*
880
   * All arguments are already evaluated, and their values are passed in the
881
   * named_argvalue/named_argnull or argvalue/argnull arrays.  This avoids
882
   * issues if one of the arguments involves a call to some other function
883
   * or subsystem that wants to use libxml on its own terms.  We examine the
884
   * original XmlExpr to identify the numbers and types of the arguments.
885
   */
886
  named_arg_strings = NIL;
887
  i = 0;
888
  foreach(arg, xexpr->named_args)
889
  {
890
    Expr     *e = (Expr *) lfirst(arg);
891
    char     *str;
892
893
    if (named_argnull[i])
894
      str = NULL;
895
    else
896
      str = map_sql_value_to_xml_value(named_argvalue[i],
897
                       exprType((Node *) e),
898
                       false);
899
    named_arg_strings = lappend(named_arg_strings, str);
900
    i++;
901
  }
902
903
  arg_strings = NIL;
904
  i = 0;
905
  foreach(arg, xexpr->args)
906
  {
907
    Expr     *e = (Expr *) lfirst(arg);
908
    char     *str;
909
910
    /* here we can just forget NULL elements immediately */
911
    if (!argnull[i])
912
    {
913
      str = map_sql_value_to_xml_value(argvalue[i],
914
                       exprType((Node *) e),
915
                       true);
916
      arg_strings = lappend(arg_strings, str);
917
    }
918
    i++;
919
  }
920
921
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
922
923
  PG_TRY();
924
  {
925
    buf = xmlBufferCreate();
926
    if (buf == NULL || xmlerrcxt->err_occurred)
927
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
928
            "could not allocate xmlBuffer");
929
    writer = xmlNewTextWriterMemory(buf, 0);
930
    if (writer == NULL || xmlerrcxt->err_occurred)
931
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
932
            "could not allocate xmlTextWriter");
933
934
    xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
935
936
    forboth(arg, named_arg_strings, narg, xexpr->arg_names)
937
    {
938
      char     *str = (char *) lfirst(arg);
939
      char     *argname = strVal(lfirst(narg));
940
941
      if (str)
942
        xmlTextWriterWriteAttribute(writer,
943
                      (xmlChar *) argname,
944
                      (xmlChar *) str);
945
    }
946
947
    foreach(arg, arg_strings)
948
    {
949
      char     *str = (char *) lfirst(arg);
950
951
      xmlTextWriterWriteRaw(writer, (xmlChar *) str);
952
    }
953
954
    xmlTextWriterEndElement(writer);
955
956
    /* we MUST do this now to flush data out to the buffer ... */
957
    xmlFreeTextWriter(writer);
958
    writer = NULL;
959
960
    result = xmlBuffer_to_xmltype(buf);
961
  }
962
  PG_CATCH();
963
  {
964
    if (writer)
965
      xmlFreeTextWriter(writer);
966
    if (buf)
967
      xmlBufferFree(buf);
968
969
    pg_xml_done(xmlerrcxt, true);
970
971
    PG_RE_THROW();
972
  }
973
  PG_END_TRY();
974
975
  xmlBufferFree(buf);
976
977
  pg_xml_done(xmlerrcxt, false);
978
979
  return result;
980
#else
981
0
  NO_XML_SUPPORT();
982
0
  return NULL;
983
0
#endif
984
0
}
985
986
987
xmltype *
988
xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
989
0
{
990
#ifdef USE_LIBXML
991
  xmlDocPtr doc;
992
993
  doc = xml_parse(data, xmloption_arg, preserve_whitespace,
994
          GetDatabaseEncoding(), NULL, NULL, NULL);
995
  xmlFreeDoc(doc);
996
997
  return (xmltype *) data;
998
#else
999
0
  NO_XML_SUPPORT();
1000
0
  return NULL;
1001
0
#endif
1002
0
}
1003
1004
1005
xmltype *
1006
xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1007
0
{
1008
#ifdef USE_LIBXML
1009
  xmltype    *result;
1010
  StringInfoData buf;
1011
1012
  if (pg_strcasecmp(target, "xml") == 0)
1013
    ereport(ERROR,
1014
        (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1015
         errmsg("invalid XML processing instruction"),
1016
         errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1017
1018
  /*
1019
   * Following the SQL standard, the null check comes after the syntax check
1020
   * above.
1021
   */
1022
  *result_is_null = arg_is_null;
1023
  if (*result_is_null)
1024
    return NULL;
1025
1026
  initStringInfo(&buf);
1027
1028
  appendStringInfo(&buf, "<?%s", target);
1029
1030
  if (arg != NULL)
1031
  {
1032
    char     *string;
1033
1034
    string = text_to_cstring(arg);
1035
    if (strstr(string, "?>") != NULL)
1036
      ereport(ERROR,
1037
          (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1038
           errmsg("invalid XML processing instruction"),
1039
           errdetail("XML processing instruction cannot contain \"?>\".")));
1040
1041
    appendStringInfoChar(&buf, ' ');
1042
    appendStringInfoString(&buf, string + strspn(string, " "));
1043
    pfree(string);
1044
  }
1045
  appendStringInfoString(&buf, "?>");
1046
1047
  result = stringinfo_to_xmltype(&buf);
1048
  pfree(buf.data);
1049
  return result;
1050
#else
1051
0
  NO_XML_SUPPORT();
1052
0
  return NULL;
1053
0
#endif
1054
0
}
1055
1056
1057
xmltype *
1058
xmlroot(xmltype *data, text *version, int standalone)
1059
0
{
1060
#ifdef USE_LIBXML
1061
  char     *str;
1062
  size_t    len;
1063
  xmlChar    *orig_version;
1064
  int     orig_standalone;
1065
  StringInfoData buf;
1066
1067
  len = VARSIZE(data) - VARHDRSZ;
1068
  str = text_to_cstring((text *) data);
1069
1070
  parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1071
1072
  if (version)
1073
    orig_version = xml_text2xmlChar(version);
1074
  else
1075
    orig_version = NULL;
1076
1077
  switch (standalone)
1078
  {
1079
    case XML_STANDALONE_YES:
1080
      orig_standalone = 1;
1081
      break;
1082
    case XML_STANDALONE_NO:
1083
      orig_standalone = 0;
1084
      break;
1085
    case XML_STANDALONE_NO_VALUE:
1086
      orig_standalone = -1;
1087
      break;
1088
    case XML_STANDALONE_OMITTED:
1089
      /* leave original value */
1090
      break;
1091
  }
1092
1093
  initStringInfo(&buf);
1094
  print_xml_decl(&buf, orig_version, 0, orig_standalone);
1095
  appendStringInfoString(&buf, str + len);
1096
1097
  return stringinfo_to_xmltype(&buf);
1098
#else
1099
0
  NO_XML_SUPPORT();
1100
0
  return NULL;
1101
0
#endif
1102
0
}
1103
1104
1105
/*
1106
 * Validate document (given as string) against DTD (given as external link)
1107
 *
1108
 * This has been removed because it is a security hole: unprivileged users
1109
 * should not be able to use Postgres to fetch arbitrary external files,
1110
 * which unfortunately is exactly what libxml is willing to do with the DTD
1111
 * parameter.
1112
 */
1113
Datum
1114
xmlvalidate(PG_FUNCTION_ARGS)
1115
0
{
1116
0
  ereport(ERROR,
1117
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1118
0
       errmsg("xmlvalidate is not implemented")));
1119
0
  return 0;
1120
0
}
1121
1122
1123
bool
1124
xml_is_document(xmltype *arg)
1125
0
{
1126
#ifdef USE_LIBXML
1127
  xmlDocPtr doc;
1128
  ErrorSaveContext escontext = {T_ErrorSaveContext};
1129
1130
  /*
1131
   * We'll report "true" if no soft error is reported by xml_parse().
1132
   */
1133
  doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1134
          GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1135
  if (doc)
1136
    xmlFreeDoc(doc);
1137
1138
  return !escontext.error_occurred;
1139
#else             /* not USE_LIBXML */
1140
0
  NO_XML_SUPPORT();
1141
0
  return false;
1142
0
#endif              /* not USE_LIBXML */
1143
0
}
1144
1145
1146
#ifdef USE_LIBXML
1147
1148
/*
1149
 * pg_xml_init_library --- set up for use of libxml
1150
 *
1151
 * This should be called by each function that is about to use libxml
1152
 * facilities but doesn't require error handling.  It initializes libxml
1153
 * and verifies compatibility with the loaded libxml version.  These are
1154
 * once-per-session activities.
1155
 *
1156
 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1157
 * check)
1158
 */
1159
void
1160
pg_xml_init_library(void)
1161
{
1162
  static bool first_time = true;
1163
1164
  if (first_time)
1165
  {
1166
    /* Stuff we need do only once per session */
1167
1168
    /*
1169
     * Currently, we have no pure UTF-8 support for internals -- check if
1170
     * we can work.
1171
     */
1172
    if (sizeof(char) != sizeof(xmlChar))
1173
      ereport(ERROR,
1174
          (errmsg("could not initialize XML library"),
1175
           errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1176
                 sizeof(char), sizeof(xmlChar))));
1177
1178
#ifdef USE_LIBXMLCONTEXT
1179
    /* Set up libxml's memory allocation our way */
1180
    xml_memory_init();
1181
#endif
1182
1183
    /* Check library compatibility */
1184
    LIBXML_TEST_VERSION;
1185
1186
    first_time = false;
1187
  }
1188
}
1189
1190
/*
1191
 * pg_xml_init --- set up for use of libxml and register an error handler
1192
 *
1193
 * This should be called by each function that is about to use libxml
1194
 * facilities and requires error handling.  It initializes libxml with
1195
 * pg_xml_init_library() and establishes our libxml error handler.
1196
 *
1197
 * strictness determines which errors are reported and which are ignored.
1198
 *
1199
 * Calls to this function MUST be followed by a PG_TRY block that guarantees
1200
 * that pg_xml_done() is called during either normal or error exit.
1201
 *
1202
 * This is exported for use by contrib/xml2, as well as other code that might
1203
 * wish to share use of this module's libxml error handler.
1204
 */
1205
PgXmlErrorContext *
1206
pg_xml_init(PgXmlStrictness strictness)
1207
{
1208
  PgXmlErrorContext *errcxt;
1209
  void     *new_errcxt;
1210
1211
  /* Do one-time setup if needed */
1212
  pg_xml_init_library();
1213
1214
  /* Create error handling context structure */
1215
  errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1216
  errcxt->magic = ERRCXT_MAGIC;
1217
  errcxt->strictness = strictness;
1218
  errcxt->err_occurred = false;
1219
  initStringInfo(&errcxt->err_buf);
1220
1221
  /*
1222
   * Save original error handler and install ours. libxml originally didn't
1223
   * distinguish between the contexts for generic and for structured error
1224
   * handlers.  If we're using an old libxml version, we must thus save the
1225
   * generic error context, even though we're using a structured error
1226
   * handler.
1227
   */
1228
  errcxt->saved_errfunc = xmlStructuredError;
1229
1230
#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1231
  errcxt->saved_errcxt = xmlStructuredErrorContext;
1232
#else
1233
  errcxt->saved_errcxt = xmlGenericErrorContext;
1234
#endif
1235
1236
  xmlSetStructuredErrorFunc(errcxt, xml_errorHandler);
1237
1238
  /*
1239
   * Verify that xmlSetStructuredErrorFunc set the context variable we
1240
   * expected it to.  If not, the error context pointer we just saved is not
1241
   * the correct thing to restore, and since that leaves us without a way to
1242
   * restore the context in pg_xml_done, we must fail.
1243
   *
1244
   * The only known situation in which this test fails is if we compile with
1245
   * headers from a libxml2 that doesn't track the structured error context
1246
   * separately (< 2.7.4), but at runtime use a version that does, or vice
1247
   * versa.  The libxml2 authors did not treat that change as constituting
1248
   * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1249
   * fails to protect us from this.
1250
   */
1251
1252
#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1253
  new_errcxt = xmlStructuredErrorContext;
1254
#else
1255
  new_errcxt = xmlGenericErrorContext;
1256
#endif
1257
1258
  if (new_errcxt != errcxt)
1259
    ereport(ERROR,
1260
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1261
         errmsg("could not set up XML error handler"),
1262
         errhint("This probably indicates that the version of libxml2"
1263
             " being used is not compatible with the libxml2"
1264
             " header files that PostgreSQL was built with.")));
1265
1266
  /*
1267
   * Also, install an entity loader to prevent unwanted fetches of external
1268
   * files and URLs.
1269
   */
1270
  errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1271
  xmlSetExternalEntityLoader(xmlPgEntityLoader);
1272
1273
  return errcxt;
1274
}
1275
1276
1277
/*
1278
 * pg_xml_done --- restore previous libxml error handling
1279
 *
1280
 * Resets libxml's global error-handling state to what it was before
1281
 * pg_xml_init() was called.
1282
 *
1283
 * This routine verifies that all pending errors have been dealt with
1284
 * (in assert-enabled builds, anyway).
1285
 */
1286
void
1287
pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1288
{
1289
  void     *cur_errcxt;
1290
1291
  /* An assert seems like enough protection here */
1292
  Assert(errcxt->magic == ERRCXT_MAGIC);
1293
1294
  /*
1295
   * In a normal exit, there should be no un-handled libxml errors.  But we
1296
   * shouldn't try to enforce this during error recovery, since the longjmp
1297
   * could have been thrown before xml_ereport had a chance to run.
1298
   */
1299
  Assert(!errcxt->err_occurred || isError);
1300
1301
  /*
1302
   * Check that libxml's global state is correct, warn if not.  This is a
1303
   * real test and not an Assert because it has a higher probability of
1304
   * happening.
1305
   */
1306
#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1307
  cur_errcxt = xmlStructuredErrorContext;
1308
#else
1309
  cur_errcxt = xmlGenericErrorContext;
1310
#endif
1311
1312
  if (cur_errcxt != errcxt)
1313
    elog(WARNING, "libxml error handling state is out of sync with xml.c");
1314
1315
  /* Restore the saved handlers */
1316
  xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1317
  xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1318
1319
  /*
1320
   * Mark the struct as invalid, just in case somebody somehow manages to
1321
   * call xml_errorHandler or xml_ereport with it.
1322
   */
1323
  errcxt->magic = 0;
1324
1325
  /* Release memory */
1326
  pfree(errcxt->err_buf.data);
1327
  pfree(errcxt);
1328
}
1329
1330
1331
/*
1332
 * pg_xml_error_occurred() --- test the error flag
1333
 */
1334
bool
1335
pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1336
{
1337
  return errcxt->err_occurred;
1338
}
1339
1340
1341
/*
1342
 * SQL/XML allows storing "XML documents" or "XML content".  "XML
1343
 * documents" are specified by the XML specification and are parsed
1344
 * easily by libxml.  "XML content" is specified by SQL/XML as the
1345
 * production "XMLDecl? content".  But libxml can only parse the
1346
 * "content" part, so we have to parse the XML declaration ourselves
1347
 * to complete this.
1348
 */
1349
1350
#define CHECK_XML_SPACE(p) \
1351
  do { \
1352
    if (!xmlIsBlank_ch(*(p))) \
1353
      return XML_ERR_SPACE_REQUIRED; \
1354
  } while (0)
1355
1356
#define SKIP_XML_SPACE(p) \
1357
  while (xmlIsBlank_ch(*(p))) (p)++
1358
1359
/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1360
/* Beware of multiple evaluations of argument! */
1361
#define PG_XMLISNAMECHAR(c) \
1362
  (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1363
      || xmlIsDigit_ch(c) \
1364
      || c == '.' || c == '-' || c == '_' || c == ':' \
1365
      || xmlIsCombiningQ(c) \
1366
      || xmlIsExtender_ch(c))
1367
1368
/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1369
static xmlChar *
1370
xml_pnstrdup(const xmlChar *str, size_t len)
1371
{
1372
  xmlChar    *result;
1373
1374
  result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1375
  memcpy(result, str, len * sizeof(xmlChar));
1376
  result[len] = 0;
1377
  return result;
1378
}
1379
1380
/* Ditto, except input is char* */
1381
static xmlChar *
1382
pg_xmlCharStrndup(const char *str, size_t len)
1383
{
1384
  xmlChar    *result;
1385
1386
  result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1387
  memcpy(result, str, len);
1388
  result[len] = '\0';
1389
1390
  return result;
1391
}
1392
1393
/*
1394
 * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1395
 *
1396
 * The input xmlChar is freed regardless of success of the copy.
1397
 */
1398
static char *
1399
xml_pstrdup_and_free(xmlChar *str)
1400
{
1401
  char     *result;
1402
1403
  if (str)
1404
  {
1405
    PG_TRY();
1406
    {
1407
      result = pstrdup((char *) str);
1408
    }
1409
    PG_FINALLY();
1410
    {
1411
      xmlFree(str);
1412
    }
1413
    PG_END_TRY();
1414
  }
1415
  else
1416
    result = NULL;
1417
1418
  return result;
1419
}
1420
1421
/*
1422
 * str is the null-terminated input string.  Remaining arguments are
1423
 * output arguments; each can be NULL if value is not wanted.
1424
 * version and encoding are returned as locally-palloc'd strings.
1425
 * Result is 0 if OK, an error code if not.
1426
 */
1427
static int
1428
parse_xml_decl(const xmlChar *str, size_t *lenp,
1429
         xmlChar **version, xmlChar **encoding, int *standalone)
1430
{
1431
  const xmlChar *p;
1432
  const xmlChar *save_p;
1433
  size_t    len;
1434
  int     utf8char;
1435
  int     utf8len;
1436
1437
  /*
1438
   * Only initialize libxml.  We don't need error handling here, but we do
1439
   * need to make sure libxml is initialized before calling any of its
1440
   * functions.  Note that this is safe (and a no-op) if caller has already
1441
   * done pg_xml_init().
1442
   */
1443
  pg_xml_init_library();
1444
1445
  /* Initialize output arguments to "not present" */
1446
  if (version)
1447
    *version = NULL;
1448
  if (encoding)
1449
    *encoding = NULL;
1450
  if (standalone)
1451
    *standalone = -1;
1452
1453
  p = str;
1454
1455
  if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1456
    goto finished;
1457
1458
  /*
1459
   * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1460
   * rather than an XMLDecl, so we have done what we came to do and found no
1461
   * XMLDecl.
1462
   *
1463
   * We need an input length value for xmlGetUTF8Char, but there's no need
1464
   * to count the whole document size, so use strnlen not strlen.
1465
   */
1466
  utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1467
  utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1468
  if (PG_XMLISNAMECHAR(utf8char))
1469
    goto finished;
1470
1471
  p += 5;
1472
1473
  /* version */
1474
  CHECK_XML_SPACE(p);
1475
  SKIP_XML_SPACE(p);
1476
  if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1477
    return XML_ERR_VERSION_MISSING;
1478
  p += 7;
1479
  SKIP_XML_SPACE(p);
1480
  if (*p != '=')
1481
    return XML_ERR_VERSION_MISSING;
1482
  p += 1;
1483
  SKIP_XML_SPACE(p);
1484
1485
  if (*p == '\'' || *p == '"')
1486
  {
1487
    const xmlChar *q;
1488
1489
    q = xmlStrchr(p + 1, *p);
1490
    if (!q)
1491
      return XML_ERR_VERSION_MISSING;
1492
1493
    if (version)
1494
      *version = xml_pnstrdup(p + 1, q - p - 1);
1495
    p = q + 1;
1496
  }
1497
  else
1498
    return XML_ERR_VERSION_MISSING;
1499
1500
  /* encoding */
1501
  save_p = p;
1502
  SKIP_XML_SPACE(p);
1503
  if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1504
  {
1505
    CHECK_XML_SPACE(save_p);
1506
    p += 8;
1507
    SKIP_XML_SPACE(p);
1508
    if (*p != '=')
1509
      return XML_ERR_MISSING_ENCODING;
1510
    p += 1;
1511
    SKIP_XML_SPACE(p);
1512
1513
    if (*p == '\'' || *p == '"')
1514
    {
1515
      const xmlChar *q;
1516
1517
      q = xmlStrchr(p + 1, *p);
1518
      if (!q)
1519
        return XML_ERR_MISSING_ENCODING;
1520
1521
      if (encoding)
1522
        *encoding = xml_pnstrdup(p + 1, q - p - 1);
1523
      p = q + 1;
1524
    }
1525
    else
1526
      return XML_ERR_MISSING_ENCODING;
1527
  }
1528
  else
1529
  {
1530
    p = save_p;
1531
  }
1532
1533
  /* standalone */
1534
  save_p = p;
1535
  SKIP_XML_SPACE(p);
1536
  if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1537
  {
1538
    CHECK_XML_SPACE(save_p);
1539
    p += 10;
1540
    SKIP_XML_SPACE(p);
1541
    if (*p != '=')
1542
      return XML_ERR_STANDALONE_VALUE;
1543
    p += 1;
1544
    SKIP_XML_SPACE(p);
1545
    if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1546
      xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1547
    {
1548
      if (standalone)
1549
        *standalone = 1;
1550
      p += 5;
1551
    }
1552
    else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1553
         xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1554
    {
1555
      if (standalone)
1556
        *standalone = 0;
1557
      p += 4;
1558
    }
1559
    else
1560
      return XML_ERR_STANDALONE_VALUE;
1561
  }
1562
  else
1563
  {
1564
    p = save_p;
1565
  }
1566
1567
  SKIP_XML_SPACE(p);
1568
  if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1569
    return XML_ERR_XMLDECL_NOT_FINISHED;
1570
  p += 2;
1571
1572
finished:
1573
  len = p - str;
1574
1575
  for (p = str; p < str + len; p++)
1576
    if (*p > 127)
1577
      return XML_ERR_INVALID_CHAR;
1578
1579
  if (lenp)
1580
    *lenp = len;
1581
1582
  return XML_ERR_OK;
1583
}
1584
1585
1586
/*
1587
 * Write an XML declaration.  On output, we adjust the XML declaration
1588
 * as follows.  (These rules are the moral equivalent of the clause
1589
 * "Serialization of an XML value" in the SQL standard.)
1590
 *
1591
 * We try to avoid generating an XML declaration if possible.  This is
1592
 * so that you don't get trivial things like xml '<foo/>' resulting in
1593
 * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1594
 * must provide a declaration if the standalone property is specified
1595
 * or if we include an encoding declaration.  If we have a
1596
 * declaration, we must specify a version (XML requires this).
1597
 * Otherwise we only make a declaration if the version is not "1.0",
1598
 * which is the default version specified in SQL:2003.
1599
 */
1600
static bool
1601
print_xml_decl(StringInfo buf, const xmlChar *version,
1602
         pg_enc encoding, int standalone)
1603
{
1604
  if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1605
    || (encoding && encoding != PG_UTF8)
1606
    || standalone != -1)
1607
  {
1608
    appendStringInfoString(buf, "<?xml");
1609
1610
    if (version)
1611
      appendStringInfo(buf, " version=\"%s\"", version);
1612
    else
1613
      appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1614
1615
    if (encoding && encoding != PG_UTF8)
1616
    {
1617
      /*
1618
       * XXX might be useful to convert this to IANA names (ISO-8859-1
1619
       * instead of LATIN1 etc.); needs field experience
1620
       */
1621
      appendStringInfo(buf, " encoding=\"%s\"",
1622
               pg_encoding_to_char(encoding));
1623
    }
1624
1625
    if (standalone == 1)
1626
      appendStringInfoString(buf, " standalone=\"yes\"");
1627
    else if (standalone == 0)
1628
      appendStringInfoString(buf, " standalone=\"no\"");
1629
    appendStringInfoString(buf, "?>");
1630
1631
    return true;
1632
  }
1633
  else
1634
    return false;
1635
}
1636
1637
/*
1638
 * Test whether an input that is to be parsed as CONTENT contains a DTD.
1639
 *
1640
 * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1641
 * satisfied by a document with a DTD, which is a bit of a wart, as it means
1642
 * the CONTENT type is not a proper superset of DOCUMENT.  SQL/XML:2006 and
1643
 * later fix that, by redefining content with reference to the "more
1644
 * permissive" Document Node of the XQuery/XPath Data Model, such that any
1645
 * DOCUMENT value is indeed also a CONTENT value.  That definition is more
1646
 * useful, as CONTENT becomes usable for parsing input of unknown form (think
1647
 * pg_restore).
1648
 *
1649
 * As used below in parse_xml when parsing for CONTENT, libxml does not give
1650
 * us the 2006+ behavior, but only the 2003; it will choke if the input has
1651
 * a DTD.  But we can provide the 2006+ definition of CONTENT easily enough,
1652
 * by detecting this case first and simply doing the parse as DOCUMENT.
1653
 *
1654
 * A DTD can be found arbitrarily far in, but that would be a contrived case;
1655
 * it will ordinarily start within a few dozen characters.  The only things
1656
 * that can precede it are an XMLDecl (here, the caller will have called
1657
 * parse_xml_decl already), whitespace, comments, and processing instructions.
1658
 * This function need only return true if it sees a valid sequence of such
1659
 * things leading to <!DOCTYPE.  It can simply return false in any other
1660
 * cases, including malformed input; that will mean the input gets parsed as
1661
 * CONTENT as originally planned, with libxml reporting any errors.
1662
 *
1663
 * This is only to be called from xml_parse, when pg_xml_init has already
1664
 * been called.  The input is already in UTF8 encoding.
1665
 */
1666
static bool
1667
xml_doctype_in_content(const xmlChar *str)
1668
{
1669
  const xmlChar *p = str;
1670
1671
  for (;;)
1672
  {
1673
    const xmlChar *e;
1674
1675
    SKIP_XML_SPACE(p);
1676
    if (*p != '<')
1677
      return false;
1678
    p++;
1679
1680
    if (*p == '!')
1681
    {
1682
      p++;
1683
1684
      /* if we see <!DOCTYPE, we can return true */
1685
      if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1686
        return true;
1687
1688
      /* otherwise, if it's not a comment, fail */
1689
      if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1690
        return false;
1691
      /* find end of comment: find -- and a > must follow */
1692
      p = xmlStrstr(p + 2, (xmlChar *) "--");
1693
      if (!p || p[2] != '>')
1694
        return false;
1695
      /* advance over comment, and keep scanning */
1696
      p += 3;
1697
      continue;
1698
    }
1699
1700
    /* otherwise, if it's not a PI <?target something?>, fail */
1701
    if (*p != '?')
1702
      return false;
1703
    p++;
1704
1705
    /* find end of PI (the string ?> is forbidden within a PI) */
1706
    e = xmlStrstr(p, (xmlChar *) "?>");
1707
    if (!e)
1708
      return false;
1709
1710
    /* advance over PI, keep scanning */
1711
    p = e + 2;
1712
  }
1713
}
1714
1715
1716
/*
1717
 * Convert a text object to XML internal representation
1718
 *
1719
 * data is the source data (must not be toasted!), encoding is its encoding,
1720
 * and xmloption_arg and preserve_whitespace are options for the
1721
 * transformation.
1722
 *
1723
 * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1724
 * XmlOptionType actually used to parse the input (typically the same as
1725
 * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1726
 *
1727
 * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1728
 * of parsed nodes from the xmlParseInNodeContext call will be returned
1729
 * to *parsed_nodes.  (It is caller's responsibility to free that.)
1730
 *
1731
 * Errors normally result in ereport(ERROR), but if escontext is an
1732
 * ErrorSaveContext, then "safe" errors are reported there instead, and the
1733
 * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1734
 *
1735
 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1736
 * else a permanent memory leak will ensue!  But note the result could
1737
 * be NULL after a soft error.
1738
 *
1739
 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1740
 * yet do not use SAX - see xmlreader.c)
1741
 */
1742
static xmlDocPtr
1743
xml_parse(text *data, XmlOptionType xmloption_arg,
1744
      bool preserve_whitespace, int encoding,
1745
      XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1746
      Node *escontext)
1747
{
1748
  int32   len;
1749
  xmlChar    *string;
1750
  xmlChar    *utf8string;
1751
  PgXmlErrorContext *xmlerrcxt;
1752
  volatile xmlParserCtxtPtr ctxt = NULL;
1753
  volatile xmlDocPtr doc = NULL;
1754
1755
  /*
1756
   * This step looks annoyingly redundant, but we must do it to have a
1757
   * null-terminated string in case encoding conversion isn't required.
1758
   */
1759
  len = VARSIZE_ANY_EXHDR(data);  /* will be useful later */
1760
  string = xml_text2xmlChar(data);
1761
1762
  /*
1763
   * If the data isn't UTF8, we must translate before giving it to libxml.
1764
   *
1765
   * XXX ideally, we'd catch any encoding conversion failure and return a
1766
   * soft error.  However, failure to convert to UTF8 should be pretty darn
1767
   * rare, so for now this is left undone.
1768
   */
1769
  utf8string = pg_do_encoding_conversion(string,
1770
                       len,
1771
                       encoding,
1772
                       PG_UTF8);
1773
1774
  /* Start up libxml and its parser */
1775
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1776
1777
  /* Use a TRY block to ensure we clean up correctly */
1778
  PG_TRY();
1779
  {
1780
    bool    parse_as_document = false;
1781
    int     options;
1782
    int     res_code;
1783
    size_t    count = 0;
1784
    xmlChar    *version = NULL;
1785
    int     standalone = 0;
1786
1787
    /* Any errors here are reported as hard ereport's */
1788
    xmlInitParser();
1789
1790
    /* Decide whether to parse as document or content */
1791
    if (xmloption_arg == XMLOPTION_DOCUMENT)
1792
      parse_as_document = true;
1793
    else
1794
    {
1795
      /* Parse and skip over the XML declaration, if any */
1796
      res_code = parse_xml_decl(utf8string,
1797
                    &count, &version, NULL, &standalone);
1798
      if (res_code != 0)
1799
      {
1800
        errsave(escontext,
1801
            errcode(ERRCODE_INVALID_XML_CONTENT),
1802
            errmsg_internal("invalid XML content: invalid XML declaration"),
1803
            errdetail_for_xml_code(res_code));
1804
        goto fail;
1805
      }
1806
1807
      /* Is there a DOCTYPE element? */
1808
      if (xml_doctype_in_content(utf8string + count))
1809
        parse_as_document = true;
1810
    }
1811
1812
    /*
1813
     * Select parse options.
1814
     *
1815
     * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1816
     * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1817
     * internal DTD are applied'.  As for external DTDs, we try to support
1818
     * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1819
     * happen because xmlPgEntityLoader prevents it.
1820
     */
1821
    options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1822
      | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1823
1824
    /* initialize output parameters */
1825
    if (parsed_xmloptiontype != NULL)
1826
      *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1827
        XMLOPTION_CONTENT;
1828
    if (parsed_nodes != NULL)
1829
      *parsed_nodes = NULL;
1830
1831
    if (parse_as_document)
1832
    {
1833
      ctxt = xmlNewParserCtxt();
1834
      if (ctxt == NULL || xmlerrcxt->err_occurred)
1835
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1836
              "could not allocate parser context");
1837
1838
      doc = xmlCtxtReadDoc(ctxt, utf8string,
1839
                 NULL,  /* no URL */
1840
                 "UTF-8",
1841
                 options);
1842
1843
      if (doc == NULL || xmlerrcxt->err_occurred)
1844
      {
1845
        /* Use original option to decide which error code to report */
1846
        if (xmloption_arg == XMLOPTION_DOCUMENT)
1847
          xml_errsave(escontext, xmlerrcxt,
1848
                ERRCODE_INVALID_XML_DOCUMENT,
1849
                "invalid XML document");
1850
        else
1851
          xml_errsave(escontext, xmlerrcxt,
1852
                ERRCODE_INVALID_XML_CONTENT,
1853
                "invalid XML content");
1854
        goto fail;
1855
      }
1856
    }
1857
    else
1858
    {
1859
      xmlNodePtr  root;
1860
      xmlNodePtr  oldroot PG_USED_FOR_ASSERTS_ONLY;
1861
1862
      /* set up document with empty root node to be the context node */
1863
      doc = xmlNewDoc(version);
1864
      if (doc == NULL || xmlerrcxt->err_occurred)
1865
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1866
              "could not allocate XML document");
1867
1868
      Assert(doc->encoding == NULL);
1869
      doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1870
      if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1871
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1872
              "could not allocate XML document");
1873
      doc->standalone = standalone;
1874
1875
      root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1876
      if (root == NULL || xmlerrcxt->err_occurred)
1877
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1878
              "could not allocate xml node");
1879
1880
      /*
1881
       * This attaches root to doc, so we need not free it separately;
1882
       * and there can't yet be any old root to free.
1883
       */
1884
      oldroot = xmlDocSetRootElement(doc, root);
1885
      Assert(oldroot == NULL);
1886
1887
      /* allow empty content */
1888
      if (*(utf8string + count))
1889
      {
1890
        xmlNodePtr  node_list = NULL;
1891
        xmlParserErrors res;
1892
1893
        res = xmlParseInNodeContext(root,
1894
                      (char *) utf8string + count,
1895
                      strlen((char *) utf8string + count),
1896
                      options,
1897
                      &node_list);
1898
1899
        if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1900
        {
1901
          xmlFreeNodeList(node_list);
1902
          xml_errsave(escontext, xmlerrcxt,
1903
                ERRCODE_INVALID_XML_CONTENT,
1904
                "invalid XML content");
1905
          goto fail;
1906
        }
1907
1908
        if (parsed_nodes != NULL)
1909
          *parsed_nodes = node_list;
1910
        else
1911
          xmlFreeNodeList(node_list);
1912
      }
1913
    }
1914
1915
fail:
1916
    ;
1917
  }
1918
  PG_CATCH();
1919
  {
1920
    if (doc != NULL)
1921
      xmlFreeDoc(doc);
1922
    if (ctxt != NULL)
1923
      xmlFreeParserCtxt(ctxt);
1924
1925
    pg_xml_done(xmlerrcxt, true);
1926
1927
    PG_RE_THROW();
1928
  }
1929
  PG_END_TRY();
1930
1931
  if (ctxt != NULL)
1932
    xmlFreeParserCtxt(ctxt);
1933
1934
  pg_xml_done(xmlerrcxt, false);
1935
1936
  return doc;
1937
}
1938
1939
1940
/*
1941
 * xmlChar<->text conversions
1942
 */
1943
static xmlChar *
1944
xml_text2xmlChar(text *in)
1945
{
1946
  return (xmlChar *) text_to_cstring(in);
1947
}
1948
1949
1950
#ifdef USE_LIBXMLCONTEXT
1951
1952
/*
1953
 * Manage the special context used for all libxml allocations (but only
1954
 * in special debug builds; see notes at top of file)
1955
 */
1956
static void
1957
xml_memory_init(void)
1958
{
1959
  /* Create memory context if not there already */
1960
  if (LibxmlContext == NULL)
1961
    LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1962
                        "Libxml context",
1963
                        ALLOCSET_DEFAULT_SIZES);
1964
1965
  /* Re-establish the callbacks even if already set */
1966
  xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1967
}
1968
1969
/*
1970
 * Wrappers for memory management functions
1971
 */
1972
static void *
1973
xml_palloc(size_t size)
1974
{
1975
  return MemoryContextAlloc(LibxmlContext, size);
1976
}
1977
1978
1979
static void *
1980
xml_repalloc(void *ptr, size_t size)
1981
{
1982
  return repalloc(ptr, size);
1983
}
1984
1985
1986
static void
1987
xml_pfree(void *ptr)
1988
{
1989
  /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1990
  if (ptr)
1991
    pfree(ptr);
1992
}
1993
1994
1995
static char *
1996
xml_pstrdup(const char *string)
1997
{
1998
  return MemoryContextStrdup(LibxmlContext, string);
1999
}
2000
#endif              /* USE_LIBXMLCONTEXT */
2001
2002
2003
/*
2004
 * xmlPgEntityLoader --- entity loader callback function
2005
 *
2006
 * Silently prevent any external entity URL from being loaded.  We don't want
2007
 * to throw an error, so instead make the entity appear to expand to an empty
2008
 * string.
2009
 *
2010
 * We would prefer to allow loading entities that exist in the system's
2011
 * global XML catalog; but the available libxml2 APIs make that a complex
2012
 * and fragile task.  For now, just shut down all external access.
2013
 */
2014
static xmlParserInputPtr
2015
xmlPgEntityLoader(const char *URL, const char *ID,
2016
          xmlParserCtxtPtr ctxt)
2017
{
2018
  return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2019
}
2020
2021
2022
/*
2023
 * xml_ereport --- report an XML-related error
2024
 *
2025
 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2026
 * standard.  This function adds libxml's native error message, if any, as
2027
 * detail.
2028
 *
2029
 * This is exported for modules that want to share the core libxml error
2030
 * handler.  Note that pg_xml_init() *must* have been called previously.
2031
 */
2032
void
2033
xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2034
{
2035
  char     *detail;
2036
2037
  /* Defend against someone passing us a bogus context struct */
2038
  if (errcxt->magic != ERRCXT_MAGIC)
2039
    elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2040
2041
  /* Flag that the current libxml error has been reported */
2042
  errcxt->err_occurred = false;
2043
2044
  /* Include detail only if we have some text from libxml */
2045
  if (errcxt->err_buf.len > 0)
2046
    detail = errcxt->err_buf.data;
2047
  else
2048
    detail = NULL;
2049
2050
  ereport(level,
2051
      (errcode(sqlcode),
2052
       errmsg_internal("%s", msg),
2053
       detail ? errdetail_internal("%s", detail) : 0));
2054
}
2055
2056
2057
/*
2058
 * xml_errsave --- save an XML-related error
2059
 *
2060
 * If escontext is an ErrorSaveContext, error details are saved into it,
2061
 * and control returns normally.
2062
 *
2063
 * Otherwise, the error is thrown, so that this is equivalent to
2064
 * xml_ereport() with level == ERROR.
2065
 *
2066
 * This should be used only for errors that we're sure we do not need
2067
 * a transaction abort to clean up after.
2068
 */
2069
static void
2070
xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2071
      int sqlcode, const char *msg)
2072
{
2073
  char     *detail;
2074
2075
  /* Defend against someone passing us a bogus context struct */
2076
  if (errcxt->magic != ERRCXT_MAGIC)
2077
    elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2078
2079
  /* Flag that the current libxml error has been reported */
2080
  errcxt->err_occurred = false;
2081
2082
  /* Include detail only if we have some text from libxml */
2083
  if (errcxt->err_buf.len > 0)
2084
    detail = errcxt->err_buf.data;
2085
  else
2086
    detail = NULL;
2087
2088
  errsave(escontext,
2089
      (errcode(sqlcode),
2090
       errmsg_internal("%s", msg),
2091
       detail ? errdetail_internal("%s", detail) : 0));
2092
}
2093
2094
2095
/*
2096
 * Error handler for libxml errors and warnings
2097
 */
2098
static void
2099
xml_errorHandler(void *data, PgXmlErrorPtr error)
2100
{
2101
  PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2102
  xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2103
  xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2104
  xmlNodePtr  node = error->node;
2105
  const xmlChar *name = (node != NULL &&
2106
               node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2107
  int     domain = error->domain;
2108
  int     level = error->level;
2109
  StringInfo  errorBuf;
2110
2111
  /*
2112
   * Defend against someone passing us a bogus context struct.
2113
   *
2114
   * We force a backend exit if this check fails because longjmp'ing out of
2115
   * libxml would likely render it unsafe to use further.
2116
   */
2117
  if (xmlerrcxt->magic != ERRCXT_MAGIC)
2118
    elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2119
2120
  /*----------
2121
   * Older libxml versions report some errors differently.
2122
   * First, some errors were previously reported as coming from the parser
2123
   * domain but are now reported as coming from the namespace domain.
2124
   * Second, some warnings were upgraded to errors.
2125
   * We attempt to compensate for that here.
2126
   *----------
2127
   */
2128
  switch (error->code)
2129
  {
2130
    case XML_WAR_NS_URI:
2131
      level = XML_ERR_ERROR;
2132
      domain = XML_FROM_NAMESPACE;
2133
      break;
2134
2135
    case XML_ERR_NS_DECL_ERROR:
2136
    case XML_WAR_NS_URI_RELATIVE:
2137
    case XML_WAR_NS_COLUMN:
2138
    case XML_NS_ERR_XML_NAMESPACE:
2139
    case XML_NS_ERR_UNDEFINED_NAMESPACE:
2140
    case XML_NS_ERR_QNAME:
2141
    case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2142
    case XML_NS_ERR_EMPTY:
2143
      domain = XML_FROM_NAMESPACE;
2144
      break;
2145
  }
2146
2147
  /* Decide whether to act on the error or not */
2148
  switch (domain)
2149
  {
2150
    case XML_FROM_PARSER:
2151
2152
      /*
2153
       * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2154
       * other, more on-point error.  Furthermore, libxml2 2.13 reports
2155
       * it under a completely different set of rules than prior
2156
       * versions.  To avoid cross-version behavioral differences,
2157
       * suppress it so long as we already logged some error.
2158
       */
2159
      if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2160
        xmlerrcxt->err_occurred)
2161
        return;
2162
      /* fall through */
2163
2164
    case XML_FROM_NONE:
2165
    case XML_FROM_MEMORY:
2166
    case XML_FROM_IO:
2167
2168
      /*
2169
       * Suppress warnings about undeclared entities.  We need to do
2170
       * this to avoid problems due to not loading DTD definitions.
2171
       */
2172
      if (error->code == XML_WAR_UNDECLARED_ENTITY)
2173
        return;
2174
2175
      /* Otherwise, accept error regardless of the parsing purpose */
2176
      break;
2177
2178
    default:
2179
      /* Ignore error if only doing well-formedness check */
2180
      if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2181
        return;
2182
      break;
2183
  }
2184
2185
  /* Prepare error message in errorBuf */
2186
  errorBuf = makeStringInfo();
2187
2188
  if (error->line > 0)
2189
    appendStringInfo(errorBuf, "line %d: ", error->line);
2190
  if (name != NULL)
2191
    appendStringInfo(errorBuf, "element %s: ", name);
2192
  if (error->message != NULL)
2193
    appendStringInfoString(errorBuf, error->message);
2194
  else
2195
    appendStringInfoString(errorBuf, "(no message provided)");
2196
2197
  /*
2198
   * Append context information to errorBuf.
2199
   *
2200
   * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2201
   * write the context.  Since we don't want to duplicate libxml
2202
   * functionality here, we set up a generic error handler temporarily.
2203
   *
2204
   * We use appendStringInfo() directly as libxml's generic error handler.
2205
   * This should work because it has essentially the same signature as
2206
   * libxml expects, namely (void *ptr, const char *msg, ...).
2207
   */
2208
  if (input != NULL)
2209
  {
2210
    xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2211
    void     *errCtxSaved = xmlGenericErrorContext;
2212
2213
    xmlSetGenericErrorFunc(errorBuf,
2214
                 (xmlGenericErrorFunc) appendStringInfo);
2215
2216
    /* Add context information to errorBuf */
2217
    appendStringInfoLineSeparator(errorBuf);
2218
2219
    xmlParserPrintFileContext(input);
2220
2221
    /* Restore generic error func */
2222
    xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2223
  }
2224
2225
  /* Get rid of any trailing newlines in errorBuf */
2226
  chopStringInfoNewlines(errorBuf);
2227
2228
  /*
2229
   * Legacy error handling mode.  err_occurred is never set, we just add the
2230
   * message to err_buf.  This mode exists because the xml2 contrib module
2231
   * uses our error-handling infrastructure, but we don't want to change its
2232
   * behaviour since it's deprecated anyway.  This is also why we don't
2233
   * distinguish between notices, warnings and errors here --- the old-style
2234
   * generic error handler wouldn't have done that either.
2235
   */
2236
  if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2237
  {
2238
    appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2239
    appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2240
                 errorBuf->len);
2241
2242
    destroyStringInfo(errorBuf);
2243
    return;
2244
  }
2245
2246
  /*
2247
   * We don't want to ereport() here because that'd probably leave libxml in
2248
   * an inconsistent state.  Instead, we remember the error and ereport()
2249
   * from xml_ereport().
2250
   *
2251
   * Warnings and notices can be reported immediately since they won't cause
2252
   * a longjmp() out of libxml.
2253
   */
2254
  if (level >= XML_ERR_ERROR)
2255
  {
2256
    appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2257
    appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2258
                 errorBuf->len);
2259
2260
    xmlerrcxt->err_occurred = true;
2261
  }
2262
  else if (level >= XML_ERR_WARNING)
2263
  {
2264
    ereport(WARNING,
2265
        (errmsg_internal("%s", errorBuf->data)));
2266
  }
2267
  else
2268
  {
2269
    ereport(NOTICE,
2270
        (errmsg_internal("%s", errorBuf->data)));
2271
  }
2272
2273
  destroyStringInfo(errorBuf);
2274
}
2275
2276
2277
/*
2278
 * Convert libxml error codes into textual errdetail messages.
2279
 *
2280
 * This should be called within an ereport or errsave invocation,
2281
 * just as errdetail would be.
2282
 *
2283
 * At the moment, we only need to cover those codes that we
2284
 * may raise in this file.
2285
 */
2286
static int
2287
errdetail_for_xml_code(int code)
2288
{
2289
  const char *det;
2290
2291
  switch (code)
2292
  {
2293
    case XML_ERR_INVALID_CHAR:
2294
      det = gettext_noop("Invalid character value.");
2295
      break;
2296
    case XML_ERR_SPACE_REQUIRED:
2297
      det = gettext_noop("Space required.");
2298
      break;
2299
    case XML_ERR_STANDALONE_VALUE:
2300
      det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2301
      break;
2302
    case XML_ERR_VERSION_MISSING:
2303
      det = gettext_noop("Malformed declaration: missing version.");
2304
      break;
2305
    case XML_ERR_MISSING_ENCODING:
2306
      det = gettext_noop("Missing encoding in text declaration.");
2307
      break;
2308
    case XML_ERR_XMLDECL_NOT_FINISHED:
2309
      det = gettext_noop("Parsing XML declaration: '?>' expected.");
2310
      break;
2311
    default:
2312
      det = gettext_noop("Unrecognized libxml error code: %d.");
2313
      break;
2314
  }
2315
2316
  return errdetail(det, code);
2317
}
2318
2319
2320
/*
2321
 * Remove all trailing newlines from a StringInfo string
2322
 */
2323
static void
2324
chopStringInfoNewlines(StringInfo str)
2325
{
2326
  while (str->len > 0 && str->data[str->len - 1] == '\n')
2327
    str->data[--str->len] = '\0';
2328
}
2329
2330
2331
/*
2332
 * Append a newline after removing any existing trailing newlines
2333
 */
2334
static void
2335
appendStringInfoLineSeparator(StringInfo str)
2336
{
2337
  chopStringInfoNewlines(str);
2338
  if (str->len > 0)
2339
    appendStringInfoChar(str, '\n');
2340
}
2341
2342
2343
/*
2344
 * Convert one char in the current server encoding to a Unicode codepoint.
2345
 */
2346
static pg_wchar
2347
sqlchar_to_unicode(const char *s)
2348
{
2349
  char     *utf8string;
2350
  pg_wchar  ret[2];     /* need space for trailing zero */
2351
2352
  /* note we're not assuming s is null-terminated */
2353
  utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2354
2355
  pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2356
                  pg_encoding_mblen(PG_UTF8, utf8string));
2357
2358
  if (utf8string != s)
2359
    pfree(utf8string);
2360
2361
  return ret[0];
2362
}
2363
2364
2365
static bool
2366
is_valid_xml_namefirst(pg_wchar c)
2367
{
2368
  /* (Letter | '_' | ':') */
2369
  return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2370
      || c == '_' || c == ':');
2371
}
2372
2373
2374
static bool
2375
is_valid_xml_namechar(pg_wchar c)
2376
{
2377
  /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2378
  return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2379
      || xmlIsDigitQ(c)
2380
      || c == '.' || c == '-' || c == '_' || c == ':'
2381
      || xmlIsCombiningQ(c)
2382
      || xmlIsExtenderQ(c));
2383
}
2384
#endif              /* USE_LIBXML */
2385
2386
2387
/*
2388
 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2389
 */
2390
char *
2391
map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2392
                 bool escape_period)
2393
0
{
2394
#ifdef USE_LIBXML
2395
  StringInfoData buf;
2396
  const char *p;
2397
2398
  /*
2399
   * SQL/XML doesn't make use of this case anywhere, so it's probably a
2400
   * mistake.
2401
   */
2402
  Assert(fully_escaped || !escape_period);
2403
2404
  initStringInfo(&buf);
2405
2406
  for (p = ident; *p; p += pg_mblen(p))
2407
  {
2408
    if (*p == ':' && (p == ident || fully_escaped))
2409
      appendStringInfoString(&buf, "_x003A_");
2410
    else if (*p == '_' && *(p + 1) == 'x')
2411
      appendStringInfoString(&buf, "_x005F_");
2412
    else if (fully_escaped && p == ident &&
2413
         pg_strncasecmp(p, "xml", 3) == 0)
2414
    {
2415
      if (*p == 'x')
2416
        appendStringInfoString(&buf, "_x0078_");
2417
      else
2418
        appendStringInfoString(&buf, "_x0058_");
2419
    }
2420
    else if (escape_period && *p == '.')
2421
      appendStringInfoString(&buf, "_x002E_");
2422
    else
2423
    {
2424
      pg_wchar  u = sqlchar_to_unicode(p);
2425
2426
      if ((p == ident)
2427
        ? !is_valid_xml_namefirst(u)
2428
        : !is_valid_xml_namechar(u))
2429
        appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2430
      else
2431
        appendBinaryStringInfo(&buf, p, pg_mblen(p));
2432
    }
2433
  }
2434
2435
  return buf.data;
2436
#else             /* not USE_LIBXML */
2437
0
  NO_XML_SUPPORT();
2438
0
  return NULL;
2439
0
#endif              /* not USE_LIBXML */
2440
0
}
2441
2442
2443
/*
2444
 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2445
 */
2446
char *
2447
map_xml_name_to_sql_identifier(const char *name)
2448
0
{
2449
0
  StringInfoData buf;
2450
0
  const char *p;
2451
2452
0
  initStringInfo(&buf);
2453
2454
0
  for (p = name; *p; p += pg_mblen(p))
2455
0
  {
2456
0
    if (*p == '_' && *(p + 1) == 'x'
2457
0
      && isxdigit((unsigned char) *(p + 2))
2458
0
      && isxdigit((unsigned char) *(p + 3))
2459
0
      && isxdigit((unsigned char) *(p + 4))
2460
0
      && isxdigit((unsigned char) *(p + 5))
2461
0
      && *(p + 6) == '_')
2462
0
    {
2463
0
      char    cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2464
0
      unsigned int u;
2465
2466
0
      sscanf(p + 2, "%X", &u);
2467
0
      pg_unicode_to_server(u, (unsigned char *) cbuf);
2468
0
      appendStringInfoString(&buf, cbuf);
2469
0
      p += 6;
2470
0
    }
2471
0
    else
2472
0
      appendBinaryStringInfo(&buf, p, pg_mblen(p));
2473
0
  }
2474
2475
0
  return buf.data;
2476
0
}
2477
2478
/*
2479
 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2480
 *
2481
 * When xml_escape_strings is true, then certain characters in string
2482
 * values are replaced by entity references (&lt; etc.), as specified
2483
 * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2484
 * wanted.  The false case is mainly useful when the resulting value
2485
 * is used with xmlTextWriterWriteAttribute() to write out an
2486
 * attribute, because that function does the escaping itself.
2487
 */
2488
char *
2489
map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2490
0
{
2491
0
  if (type_is_array_domain(type))
2492
0
  {
2493
0
    ArrayType  *array;
2494
0
    Oid     elmtype;
2495
0
    int16   elmlen;
2496
0
    bool    elmbyval;
2497
0
    char    elmalign;
2498
0
    int     num_elems;
2499
0
    Datum    *elem_values;
2500
0
    bool     *elem_nulls;
2501
0
    StringInfoData buf;
2502
0
    int     i;
2503
2504
0
    array = DatumGetArrayTypeP(value);
2505
0
    elmtype = ARR_ELEMTYPE(array);
2506
0
    get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2507
2508
0
    deconstruct_array(array, elmtype,
2509
0
              elmlen, elmbyval, elmalign,
2510
0
              &elem_values, &elem_nulls,
2511
0
              &num_elems);
2512
2513
0
    initStringInfo(&buf);
2514
2515
0
    for (i = 0; i < num_elems; i++)
2516
0
    {
2517
0
      if (elem_nulls[i])
2518
0
        continue;
2519
0
      appendStringInfoString(&buf, "<element>");
2520
0
      appendStringInfoString(&buf,
2521
0
                   map_sql_value_to_xml_value(elem_values[i],
2522
0
                                elmtype, true));
2523
0
      appendStringInfoString(&buf, "</element>");
2524
0
    }
2525
2526
0
    pfree(elem_values);
2527
0
    pfree(elem_nulls);
2528
2529
0
    return buf.data;
2530
0
  }
2531
0
  else
2532
0
  {
2533
0
    Oid     typeOut;
2534
0
    bool    isvarlena;
2535
0
    char     *str;
2536
2537
    /*
2538
     * Flatten domains; the special-case treatments below should apply to,
2539
     * eg, domains over boolean not just boolean.
2540
     */
2541
0
    type = getBaseType(type);
2542
2543
    /*
2544
     * Special XSD formatting for some data types
2545
     */
2546
0
    switch (type)
2547
0
    {
2548
0
      case BOOLOID:
2549
0
        if (DatumGetBool(value))
2550
0
          return "true";
2551
0
        else
2552
0
          return "false";
2553
2554
0
      case DATEOID:
2555
0
        {
2556
0
          DateADT   date;
2557
0
          struct pg_tm tm;
2558
0
          char    buf[MAXDATELEN + 1];
2559
2560
0
          date = DatumGetDateADT(value);
2561
          /* XSD doesn't support infinite values */
2562
0
          if (DATE_NOT_FINITE(date))
2563
0
            ereport(ERROR,
2564
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2565
0
                 errmsg("date out of range"),
2566
0
                 errdetail("XML does not support infinite date values.")));
2567
0
          j2date(date + POSTGRES_EPOCH_JDATE,
2568
0
               &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2569
0
          EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2570
2571
0
          return pstrdup(buf);
2572
0
        }
2573
2574
0
      case TIMESTAMPOID:
2575
0
        {
2576
0
          Timestamp timestamp;
2577
0
          struct pg_tm tm;
2578
0
          fsec_t    fsec;
2579
0
          char    buf[MAXDATELEN + 1];
2580
2581
0
          timestamp = DatumGetTimestamp(value);
2582
2583
          /* XSD doesn't support infinite values */
2584
0
          if (TIMESTAMP_NOT_FINITE(timestamp))
2585
0
            ereport(ERROR,
2586
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2587
0
                 errmsg("timestamp out of range"),
2588
0
                 errdetail("XML does not support infinite timestamp values.")));
2589
0
          else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2590
0
            EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2591
0
          else
2592
0
            ereport(ERROR,
2593
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2594
0
                 errmsg("timestamp out of range")));
2595
2596
0
          return pstrdup(buf);
2597
0
        }
2598
2599
0
      case TIMESTAMPTZOID:
2600
0
        {
2601
0
          TimestampTz timestamp;
2602
0
          struct pg_tm tm;
2603
0
          int     tz;
2604
0
          fsec_t    fsec;
2605
0
          const char *tzn = NULL;
2606
0
          char    buf[MAXDATELEN + 1];
2607
2608
0
          timestamp = DatumGetTimestamp(value);
2609
2610
          /* XSD doesn't support infinite values */
2611
0
          if (TIMESTAMP_NOT_FINITE(timestamp))
2612
0
            ereport(ERROR,
2613
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2614
0
                 errmsg("timestamp out of range"),
2615
0
                 errdetail("XML does not support infinite timestamp values.")));
2616
0
          else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2617
0
            EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2618
0
          else
2619
0
            ereport(ERROR,
2620
0
                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2621
0
                 errmsg("timestamp out of range")));
2622
2623
0
          return pstrdup(buf);
2624
0
        }
2625
2626
#ifdef USE_LIBXML
2627
      case BYTEAOID:
2628
        {
2629
          bytea    *bstr = DatumGetByteaPP(value);
2630
          PgXmlErrorContext *xmlerrcxt;
2631
          volatile xmlBufferPtr buf = NULL;
2632
          volatile xmlTextWriterPtr writer = NULL;
2633
          char     *result;
2634
2635
          xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2636
2637
          PG_TRY();
2638
          {
2639
            buf = xmlBufferCreate();
2640
            if (buf == NULL || xmlerrcxt->err_occurred)
2641
              xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2642
                    "could not allocate xmlBuffer");
2643
            writer = xmlNewTextWriterMemory(buf, 0);
2644
            if (writer == NULL || xmlerrcxt->err_occurred)
2645
              xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2646
                    "could not allocate xmlTextWriter");
2647
2648
            if (xmlbinary == XMLBINARY_BASE64)
2649
              xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2650
                           0, VARSIZE_ANY_EXHDR(bstr));
2651
            else
2652
              xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2653
                           0, VARSIZE_ANY_EXHDR(bstr));
2654
2655
            /* we MUST do this now to flush data out to the buffer */
2656
            xmlFreeTextWriter(writer);
2657
            writer = NULL;
2658
2659
            result = pstrdup((const char *) xmlBufferContent(buf));
2660
          }
2661
          PG_CATCH();
2662
          {
2663
            if (writer)
2664
              xmlFreeTextWriter(writer);
2665
            if (buf)
2666
              xmlBufferFree(buf);
2667
2668
            pg_xml_done(xmlerrcxt, true);
2669
2670
            PG_RE_THROW();
2671
          }
2672
          PG_END_TRY();
2673
2674
          xmlBufferFree(buf);
2675
2676
          pg_xml_done(xmlerrcxt, false);
2677
2678
          return result;
2679
        }
2680
#endif              /* USE_LIBXML */
2681
2682
0
    }
2683
2684
    /*
2685
     * otherwise, just use the type's native text representation
2686
     */
2687
0
    getTypeOutputInfo(type, &typeOut, &isvarlena);
2688
0
    str = OidOutputFunctionCall(typeOut, value);
2689
2690
    /* ... exactly as-is for XML, and when escaping is not wanted */
2691
0
    if (type == XMLOID || !xml_escape_strings)
2692
0
      return str;
2693
2694
    /* otherwise, translate special characters as needed */
2695
0
    return escape_xml(str);
2696
0
  }
2697
0
}
2698
2699
2700
/*
2701
 * Escape characters in text that have special meanings in XML.
2702
 *
2703
 * Returns a palloc'd string.
2704
 *
2705
 * NB: this is intentionally not dependent on libxml.
2706
 */
2707
char *
2708
escape_xml(const char *str)
2709
0
{
2710
0
  StringInfoData buf;
2711
0
  const char *p;
2712
2713
0
  initStringInfo(&buf);
2714
0
  for (p = str; *p; p++)
2715
0
  {
2716
0
    switch (*p)
2717
0
    {
2718
0
      case '&':
2719
0
        appendStringInfoString(&buf, "&amp;");
2720
0
        break;
2721
0
      case '<':
2722
0
        appendStringInfoString(&buf, "&lt;");
2723
0
        break;
2724
0
      case '>':
2725
0
        appendStringInfoString(&buf, "&gt;");
2726
0
        break;
2727
0
      case '\r':
2728
0
        appendStringInfoString(&buf, "&#x0d;");
2729
0
        break;
2730
0
      default:
2731
0
        appendStringInfoCharMacro(&buf, *p);
2732
0
        break;
2733
0
    }
2734
0
  }
2735
0
  return buf.data;
2736
0
}
2737
2738
2739
static char *
2740
_SPI_strdup(const char *s)
2741
0
{
2742
0
  size_t    len = strlen(s) + 1;
2743
0
  char     *ret = SPI_palloc(len);
2744
2745
0
  memcpy(ret, s, len);
2746
0
  return ret;
2747
0
}
2748
2749
2750
/*
2751
 * SQL to XML mapping functions
2752
 *
2753
 * What follows below was at one point intentionally organized so that
2754
 * you can read along in the SQL/XML standard. The functions are
2755
 * mostly split up the way the clauses lay out in the standards
2756
 * document, and the identifiers are also aligned with the standard
2757
 * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2758
 * differently than SQL/XML:2003, so the order below doesn't make much
2759
 * sense anymore.
2760
 *
2761
 * There are many things going on there:
2762
 *
2763
 * There are two kinds of mappings: Mapping SQL data (table contents)
2764
 * to XML documents, and mapping SQL structure (the "schema") to XML
2765
 * Schema.  And there are functions that do both at the same time.
2766
 *
2767
 * Then you can map a database, a schema, or a table, each in both
2768
 * ways.  This breaks down recursively: Mapping a database invokes
2769
 * mapping schemas, which invokes mapping tables, which invokes
2770
 * mapping rows, which invokes mapping columns, although you can't
2771
 * call the last two from the outside.  Because of this, there are a
2772
 * number of xyz_internal() functions which are to be called both from
2773
 * the function manager wrapper and from some upper layer in a
2774
 * recursive call.
2775
 *
2776
 * See the documentation about what the common function arguments
2777
 * nulls, tableforest, and targetns mean.
2778
 *
2779
 * Some style guidelines for XML output: Use double quotes for quoting
2780
 * XML attributes.  Indent XML elements by two spaces, but remember
2781
 * that a lot of code is called recursively at different levels, so
2782
 * it's better not to indent rather than create output that indents
2783
 * and outdents weirdly.  Add newlines to make the output look nice.
2784
 */
2785
2786
2787
/*
2788
 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2789
 * 4.10.8.
2790
 */
2791
2792
/*
2793
 * Given a query, which must return type oid as first column, produce
2794
 * a list of Oids with the query results.
2795
 */
2796
static List *
2797
query_to_oid_list(const char *query)
2798
0
{
2799
0
  uint64    i;
2800
0
  List     *list = NIL;
2801
0
  int     spi_result;
2802
2803
0
  spi_result = SPI_execute(query, true, 0);
2804
0
  if (spi_result != SPI_OK_SELECT)
2805
0
    elog(ERROR, "SPI_execute returned %s for %s",
2806
0
       SPI_result_code_string(spi_result), query);
2807
2808
0
  for (i = 0; i < SPI_processed; i++)
2809
0
  {
2810
0
    Datum   oid;
2811
0
    bool    isnull;
2812
2813
0
    oid = SPI_getbinval(SPI_tuptable->vals[i],
2814
0
              SPI_tuptable->tupdesc,
2815
0
              1,
2816
0
              &isnull);
2817
0
    if (!isnull)
2818
0
      list = lappend_oid(list, DatumGetObjectId(oid));
2819
0
  }
2820
2821
0
  return list;
2822
0
}
2823
2824
2825
static List *
2826
schema_get_xml_visible_tables(Oid nspid)
2827
0
{
2828
0
  StringInfoData query;
2829
2830
0
  initStringInfo(&query);
2831
0
  appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2832
0
           " WHERE relnamespace = %u AND relkind IN ("
2833
0
           CppAsString2(RELKIND_RELATION) ","
2834
0
           CppAsString2(RELKIND_MATVIEW) ","
2835
0
           CppAsString2(RELKIND_VIEW) ")"
2836
0
           " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2837
0
           " ORDER BY relname;", nspid);
2838
2839
0
  return query_to_oid_list(query.data);
2840
0
}
2841
2842
2843
/*
2844
 * Including the system schemas is probably not useful for a database
2845
 * mapping.
2846
 */
2847
#define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2848
2849
0
#define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2850
2851
2852
static List *
2853
database_get_xml_visible_schemas(void)
2854
0
{
2855
0
  return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2856
0
}
2857
2858
2859
static List *
2860
database_get_xml_visible_tables(void)
2861
0
{
2862
  /* At the moment there is no order required here. */
2863
0
  return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2864
0
               " WHERE relkind IN ("
2865
0
               CppAsString2(RELKIND_RELATION) ","
2866
0
               CppAsString2(RELKIND_MATVIEW) ","
2867
0
               CppAsString2(RELKIND_VIEW) ")"
2868
0
               " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2869
0
               " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2870
0
}
2871
2872
2873
/*
2874
 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2875
 * section 9.11.
2876
 */
2877
2878
static StringInfo
2879
table_to_xml_internal(Oid relid,
2880
            const char *xmlschema, bool nulls, bool tableforest,
2881
            const char *targetns, bool top_level)
2882
0
{
2883
0
  StringInfoData query;
2884
2885
0
  initStringInfo(&query);
2886
0
  appendStringInfo(&query, "SELECT * FROM %s",
2887
0
           DatumGetCString(DirectFunctionCall1(regclassout,
2888
0
                             ObjectIdGetDatum(relid))));
2889
0
  return query_to_xml_internal(query.data, get_rel_name(relid),
2890
0
                 xmlschema, nulls, tableforest,
2891
0
                 targetns, top_level);
2892
0
}
2893
2894
2895
Datum
2896
table_to_xml(PG_FUNCTION_ARGS)
2897
0
{
2898
0
  Oid     relid = PG_GETARG_OID(0);
2899
0
  bool    nulls = PG_GETARG_BOOL(1);
2900
0
  bool    tableforest = PG_GETARG_BOOL(2);
2901
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2902
2903
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2904
0
                                nulls, tableforest,
2905
0
                                targetns, true)));
2906
0
}
2907
2908
2909
Datum
2910
query_to_xml(PG_FUNCTION_ARGS)
2911
0
{
2912
0
  char     *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2913
0
  bool    nulls = PG_GETARG_BOOL(1);
2914
0
  bool    tableforest = PG_GETARG_BOOL(2);
2915
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2916
2917
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2918
0
                                NULL, nulls, tableforest,
2919
0
                                targetns, true)));
2920
0
}
2921
2922
2923
Datum
2924
cursor_to_xml(PG_FUNCTION_ARGS)
2925
0
{
2926
0
  char     *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2927
0
  int32   count = PG_GETARG_INT32(1);
2928
0
  bool    nulls = PG_GETARG_BOOL(2);
2929
0
  bool    tableforest = PG_GETARG_BOOL(3);
2930
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2931
2932
0
  StringInfoData result;
2933
0
  Portal    portal;
2934
0
  uint64    i;
2935
2936
0
  initStringInfo(&result);
2937
2938
0
  if (!tableforest)
2939
0
  {
2940
0
    xmldata_root_element_start(&result, "table", NULL, targetns, true);
2941
0
    appendStringInfoChar(&result, '\n');
2942
0
  }
2943
2944
0
  SPI_connect();
2945
0
  portal = SPI_cursor_find(name);
2946
0
  if (portal == NULL)
2947
0
    ereport(ERROR,
2948
0
        (errcode(ERRCODE_UNDEFINED_CURSOR),
2949
0
         errmsg("cursor \"%s\" does not exist", name)));
2950
2951
0
  SPI_cursor_fetch(portal, true, count);
2952
0
  for (i = 0; i < SPI_processed; i++)
2953
0
    SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2954
0
                  tableforest, targetns, true);
2955
2956
0
  SPI_finish();
2957
2958
0
  if (!tableforest)
2959
0
    xmldata_root_element_end(&result, "table");
2960
2961
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2962
0
}
2963
2964
2965
/*
2966
 * Write the start tag of the root element of a data mapping.
2967
 *
2968
 * top_level means that this is the very top level of the eventual
2969
 * output.  For example, when the user calls table_to_xml, then a call
2970
 * with a table name to this function is the top level.  When the user
2971
 * calls database_to_xml, then a call with a schema name to this
2972
 * function is not the top level.  If top_level is false, then the XML
2973
 * namespace declarations are omitted, because they supposedly already
2974
 * appeared earlier in the output.  Repeating them is not wrong, but
2975
 * it looks ugly.
2976
 */
2977
static void
2978
xmldata_root_element_start(StringInfo result, const char *eltname,
2979
               const char *xmlschema, const char *targetns,
2980
               bool top_level)
2981
0
{
2982
  /* This isn't really wrong but currently makes no sense. */
2983
0
  Assert(top_level || !xmlschema);
2984
2985
0
  appendStringInfo(result, "<%s", eltname);
2986
0
  if (top_level)
2987
0
  {
2988
0
    appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2989
0
    if (strlen(targetns) > 0)
2990
0
      appendStringInfo(result, " xmlns=\"%s\"", targetns);
2991
0
  }
2992
0
  if (xmlschema)
2993
0
  {
2994
    /* FIXME: better targets */
2995
0
    if (strlen(targetns) > 0)
2996
0
      appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2997
0
    else
2998
0
      appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2999
0
  }
3000
0
  appendStringInfoString(result, ">\n");
3001
0
}
3002
3003
3004
static void
3005
xmldata_root_element_end(StringInfo result, const char *eltname)
3006
0
{
3007
0
  appendStringInfo(result, "</%s>\n", eltname);
3008
0
}
3009
3010
3011
static StringInfo
3012
query_to_xml_internal(const char *query, char *tablename,
3013
            const char *xmlschema, bool nulls, bool tableforest,
3014
            const char *targetns, bool top_level)
3015
0
{
3016
0
  StringInfo  result;
3017
0
  char     *xmltn;
3018
0
  uint64    i;
3019
3020
0
  if (tablename)
3021
0
    xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3022
0
  else
3023
0
    xmltn = "table";
3024
3025
0
  result = makeStringInfo();
3026
3027
0
  SPI_connect();
3028
0
  if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3029
0
    ereport(ERROR,
3030
0
        (errcode(ERRCODE_DATA_EXCEPTION),
3031
0
         errmsg("invalid query")));
3032
3033
0
  if (!tableforest)
3034
0
  {
3035
0
    xmldata_root_element_start(result, xmltn, xmlschema,
3036
0
                   targetns, top_level);
3037
0
    appendStringInfoChar(result, '\n');
3038
0
  }
3039
3040
0
  if (xmlschema)
3041
0
    appendStringInfo(result, "%s\n\n", xmlschema);
3042
3043
0
  for (i = 0; i < SPI_processed; i++)
3044
0
    SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3045
0
                  tableforest, targetns, top_level);
3046
3047
0
  if (!tableforest)
3048
0
    xmldata_root_element_end(result, xmltn);
3049
3050
0
  SPI_finish();
3051
3052
0
  return result;
3053
0
}
3054
3055
3056
Datum
3057
table_to_xmlschema(PG_FUNCTION_ARGS)
3058
0
{
3059
0
  Oid     relid = PG_GETARG_OID(0);
3060
0
  bool    nulls = PG_GETARG_BOOL(1);
3061
0
  bool    tableforest = PG_GETARG_BOOL(2);
3062
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3063
0
  const char *result;
3064
0
  Relation  rel;
3065
3066
0
  rel = table_open(relid, AccessShareLock);
3067
0
  result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3068
0
                    tableforest, targetns);
3069
0
  table_close(rel, NoLock);
3070
3071
0
  PG_RETURN_XML_P(cstring_to_xmltype(result));
3072
0
}
3073
3074
3075
Datum
3076
query_to_xmlschema(PG_FUNCTION_ARGS)
3077
0
{
3078
0
  char     *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3079
0
  bool    nulls = PG_GETARG_BOOL(1);
3080
0
  bool    tableforest = PG_GETARG_BOOL(2);
3081
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3082
0
  const char *result;
3083
0
  SPIPlanPtr  plan;
3084
0
  Portal    portal;
3085
3086
0
  SPI_connect();
3087
3088
0
  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3089
0
    elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3090
3091
0
  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3092
0
    elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3093
3094
0
  result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3095
0
                          InvalidOid, nulls,
3096
0
                          tableforest, targetns));
3097
0
  SPI_cursor_close(portal);
3098
0
  SPI_finish();
3099
3100
0
  PG_RETURN_XML_P(cstring_to_xmltype(result));
3101
0
}
3102
3103
3104
Datum
3105
cursor_to_xmlschema(PG_FUNCTION_ARGS)
3106
0
{
3107
0
  char     *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3108
0
  bool    nulls = PG_GETARG_BOOL(1);
3109
0
  bool    tableforest = PG_GETARG_BOOL(2);
3110
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3111
0
  const char *xmlschema;
3112
0
  Portal    portal;
3113
3114
0
  SPI_connect();
3115
0
  portal = SPI_cursor_find(name);
3116
0
  if (portal == NULL)
3117
0
    ereport(ERROR,
3118
0
        (errcode(ERRCODE_UNDEFINED_CURSOR),
3119
0
         errmsg("cursor \"%s\" does not exist", name)));
3120
0
  if (portal->tupDesc == NULL)
3121
0
    ereport(ERROR,
3122
0
        (errcode(ERRCODE_INVALID_CURSOR_STATE),
3123
0
         errmsg("portal \"%s\" does not return tuples", name)));
3124
3125
0
  xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3126
0
                             InvalidOid, nulls,
3127
0
                             tableforest, targetns));
3128
0
  SPI_finish();
3129
3130
0
  PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3131
0
}
3132
3133
3134
Datum
3135
table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3136
0
{
3137
0
  Oid     relid = PG_GETARG_OID(0);
3138
0
  bool    nulls = PG_GETARG_BOOL(1);
3139
0
  bool    tableforest = PG_GETARG_BOOL(2);
3140
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3141
0
  Relation  rel;
3142
0
  const char *xmlschema;
3143
3144
0
  rel = table_open(relid, AccessShareLock);
3145
0
  xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3146
0
                       tableforest, targetns);
3147
0
  table_close(rel, NoLock);
3148
3149
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3150
0
                                xmlschema, nulls, tableforest,
3151
0
                                targetns, true)));
3152
0
}
3153
3154
3155
Datum
3156
query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3157
0
{
3158
0
  char     *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3159
0
  bool    nulls = PG_GETARG_BOOL(1);
3160
0
  bool    tableforest = PG_GETARG_BOOL(2);
3161
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3162
3163
0
  const char *xmlschema;
3164
0
  SPIPlanPtr  plan;
3165
0
  Portal    portal;
3166
3167
0
  SPI_connect();
3168
3169
0
  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3170
0
    elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3171
3172
0
  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3173
0
    elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3174
3175
0
  xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3176
0
                             InvalidOid, nulls, tableforest, targetns));
3177
0
  SPI_cursor_close(portal);
3178
0
  SPI_finish();
3179
3180
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3181
0
                                xmlschema, nulls, tableforest,
3182
0
                                targetns, true)));
3183
0
}
3184
3185
3186
/*
3187
 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3188
 * sections 9.13, 9.14.
3189
 */
3190
3191
static StringInfo
3192
schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3193
             bool tableforest, const char *targetns, bool top_level)
3194
0
{
3195
0
  StringInfo  result;
3196
0
  char     *xmlsn;
3197
0
  List     *relid_list;
3198
0
  ListCell   *cell;
3199
3200
0
  xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3201
0
                       true, false);
3202
0
  result = makeStringInfo();
3203
3204
0
  xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3205
0
  appendStringInfoChar(result, '\n');
3206
3207
0
  if (xmlschema)
3208
0
    appendStringInfo(result, "%s\n\n", xmlschema);
3209
3210
0
  SPI_connect();
3211
3212
0
  relid_list = schema_get_xml_visible_tables(nspid);
3213
3214
0
  foreach(cell, relid_list)
3215
0
  {
3216
0
    Oid     relid = lfirst_oid(cell);
3217
0
    StringInfo  subres;
3218
3219
0
    subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3220
0
                     targetns, false);
3221
3222
0
    appendBinaryStringInfo(result, subres->data, subres->len);
3223
0
    appendStringInfoChar(result, '\n');
3224
0
  }
3225
3226
0
  SPI_finish();
3227
3228
0
  xmldata_root_element_end(result, xmlsn);
3229
3230
0
  return result;
3231
0
}
3232
3233
3234
Datum
3235
schema_to_xml(PG_FUNCTION_ARGS)
3236
0
{
3237
0
  Name    name = PG_GETARG_NAME(0);
3238
0
  bool    nulls = PG_GETARG_BOOL(1);
3239
0
  bool    tableforest = PG_GETARG_BOOL(2);
3240
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3241
3242
0
  char     *schemaname;
3243
0
  Oid     nspid;
3244
3245
0
  schemaname = NameStr(*name);
3246
0
  nspid = LookupExplicitNamespace(schemaname, false);
3247
3248
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3249
0
                                 nulls, tableforest, targetns, true)));
3250
0
}
3251
3252
3253
/*
3254
 * Write the start element of the root element of an XML Schema mapping.
3255
 */
3256
static void
3257
xsd_schema_element_start(StringInfo result, const char *targetns)
3258
0
{
3259
0
  appendStringInfoString(result,
3260
0
               "<xsd:schema\n"
3261
0
               "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
3262
0
  if (strlen(targetns) > 0)
3263
0
    appendStringInfo(result,
3264
0
             "\n"
3265
0
             "    targetNamespace=\"%s\"\n"
3266
0
             "    elementFormDefault=\"qualified\"",
3267
0
             targetns);
3268
0
  appendStringInfoString(result,
3269
0
               ">\n\n");
3270
0
}
3271
3272
3273
static void
3274
xsd_schema_element_end(StringInfo result)
3275
0
{
3276
0
  appendStringInfoString(result, "</xsd:schema>");
3277
0
}
3278
3279
3280
static StringInfo
3281
schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3282
               bool tableforest, const char *targetns)
3283
0
{
3284
0
  Oid     nspid;
3285
0
  List     *relid_list;
3286
0
  List     *tupdesc_list;
3287
0
  ListCell   *cell;
3288
0
  StringInfo  result;
3289
3290
0
  result = makeStringInfo();
3291
3292
0
  nspid = LookupExplicitNamespace(schemaname, false);
3293
3294
0
  xsd_schema_element_start(result, targetns);
3295
3296
0
  SPI_connect();
3297
3298
0
  relid_list = schema_get_xml_visible_tables(nspid);
3299
3300
0
  tupdesc_list = NIL;
3301
0
  foreach(cell, relid_list)
3302
0
  {
3303
0
    Relation  rel;
3304
3305
0
    rel = table_open(lfirst_oid(cell), AccessShareLock);
3306
0
    tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3307
0
    table_close(rel, NoLock);
3308
0
  }
3309
3310
0
  appendStringInfoString(result,
3311
0
               map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3312
3313
0
  appendStringInfoString(result,
3314
0
               map_sql_schema_to_xmlschema_types(nspid, relid_list,
3315
0
                               nulls, tableforest, targetns));
3316
3317
0
  xsd_schema_element_end(result);
3318
3319
0
  SPI_finish();
3320
3321
0
  return result;
3322
0
}
3323
3324
3325
Datum
3326
schema_to_xmlschema(PG_FUNCTION_ARGS)
3327
0
{
3328
0
  Name    name = PG_GETARG_NAME(0);
3329
0
  bool    nulls = PG_GETARG_BOOL(1);
3330
0
  bool    tableforest = PG_GETARG_BOOL(2);
3331
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3332
3333
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3334
0
                                     nulls, tableforest, targetns)));
3335
0
}
3336
3337
3338
Datum
3339
schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3340
0
{
3341
0
  Name    name = PG_GETARG_NAME(0);
3342
0
  bool    nulls = PG_GETARG_BOOL(1);
3343
0
  bool    tableforest = PG_GETARG_BOOL(2);
3344
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3345
0
  char     *schemaname;
3346
0
  Oid     nspid;
3347
0
  StringInfo  xmlschema;
3348
3349
0
  schemaname = NameStr(*name);
3350
0
  nspid = LookupExplicitNamespace(schemaname, false);
3351
3352
0
  xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3353
0
                       tableforest, targetns);
3354
3355
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3356
0
                                 xmlschema->data, nulls,
3357
0
                                 tableforest, targetns, true)));
3358
0
}
3359
3360
3361
/*
3362
 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3363
 * sections 9.16, 9.17.
3364
 */
3365
3366
static StringInfo
3367
database_to_xml_internal(const char *xmlschema, bool nulls,
3368
             bool tableforest, const char *targetns)
3369
0
{
3370
0
  StringInfo  result;
3371
0
  List     *nspid_list;
3372
0
  ListCell   *cell;
3373
0
  char     *xmlcn;
3374
3375
0
  xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3376
0
                       true, false);
3377
0
  result = makeStringInfo();
3378
3379
0
  xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3380
0
  appendStringInfoChar(result, '\n');
3381
3382
0
  if (xmlschema)
3383
0
    appendStringInfo(result, "%s\n\n", xmlschema);
3384
3385
0
  SPI_connect();
3386
3387
0
  nspid_list = database_get_xml_visible_schemas();
3388
3389
0
  foreach(cell, nspid_list)
3390
0
  {
3391
0
    Oid     nspid = lfirst_oid(cell);
3392
0
    StringInfo  subres;
3393
3394
0
    subres = schema_to_xml_internal(nspid, NULL, nulls,
3395
0
                    tableforest, targetns, false);
3396
3397
0
    appendBinaryStringInfo(result, subres->data, subres->len);
3398
0
    appendStringInfoChar(result, '\n');
3399
0
  }
3400
3401
0
  SPI_finish();
3402
3403
0
  xmldata_root_element_end(result, xmlcn);
3404
3405
0
  return result;
3406
0
}
3407
3408
3409
Datum
3410
database_to_xml(PG_FUNCTION_ARGS)
3411
0
{
3412
0
  bool    nulls = PG_GETARG_BOOL(0);
3413
0
  bool    tableforest = PG_GETARG_BOOL(1);
3414
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3415
3416
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3417
0
                                   tableforest, targetns)));
3418
0
}
3419
3420
3421
static StringInfo
3422
database_to_xmlschema_internal(bool nulls, bool tableforest,
3423
                 const char *targetns)
3424
0
{
3425
0
  List     *relid_list;
3426
0
  List     *nspid_list;
3427
0
  List     *tupdesc_list;
3428
0
  ListCell   *cell;
3429
0
  StringInfo  result;
3430
3431
0
  result = makeStringInfo();
3432
3433
0
  xsd_schema_element_start(result, targetns);
3434
3435
0
  SPI_connect();
3436
3437
0
  relid_list = database_get_xml_visible_tables();
3438
0
  nspid_list = database_get_xml_visible_schemas();
3439
3440
0
  tupdesc_list = NIL;
3441
0
  foreach(cell, relid_list)
3442
0
  {
3443
0
    Relation  rel;
3444
3445
0
    rel = table_open(lfirst_oid(cell), AccessShareLock);
3446
0
    tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3447
0
    table_close(rel, NoLock);
3448
0
  }
3449
3450
0
  appendStringInfoString(result,
3451
0
               map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3452
3453
0
  appendStringInfoString(result,
3454
0
               map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3455
3456
0
  xsd_schema_element_end(result);
3457
3458
0
  SPI_finish();
3459
3460
0
  return result;
3461
0
}
3462
3463
3464
Datum
3465
database_to_xmlschema(PG_FUNCTION_ARGS)
3466
0
{
3467
0
  bool    nulls = PG_GETARG_BOOL(0);
3468
0
  bool    tableforest = PG_GETARG_BOOL(1);
3469
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3470
3471
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3472
0
                                     tableforest, targetns)));
3473
0
}
3474
3475
3476
Datum
3477
database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3478
0
{
3479
0
  bool    nulls = PG_GETARG_BOOL(0);
3480
0
  bool    tableforest = PG_GETARG_BOOL(1);
3481
0
  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3482
0
  StringInfo  xmlschema;
3483
3484
0
  xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3485
3486
0
  PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3487
0
                                   nulls, tableforest, targetns)));
3488
0
}
3489
3490
3491
/*
3492
 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3493
 * 9.2.
3494
 */
3495
static char *
3496
map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3497
0
{
3498
0
  StringInfoData result;
3499
3500
0
  initStringInfo(&result);
3501
3502
0
  if (a)
3503
0
    appendStringInfoString(&result,
3504
0
                 map_sql_identifier_to_xml_name(a, true, true));
3505
0
  if (b)
3506
0
    appendStringInfo(&result, ".%s",
3507
0
             map_sql_identifier_to_xml_name(b, true, true));
3508
0
  if (c)
3509
0
    appendStringInfo(&result, ".%s",
3510
0
             map_sql_identifier_to_xml_name(c, true, true));
3511
0
  if (d)
3512
0
    appendStringInfo(&result, ".%s",
3513
0
             map_sql_identifier_to_xml_name(d, true, true));
3514
3515
0
  return result.data;
3516
0
}
3517
3518
3519
/*
3520
 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3521
 * section 9.11.
3522
 *
3523
 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3524
 * 9.9.
3525
 */
3526
static const char *
3527
map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3528
               bool tableforest, const char *targetns)
3529
0
{
3530
0
  int     i;
3531
0
  char     *xmltn;
3532
0
  char     *tabletypename;
3533
0
  char     *rowtypename;
3534
0
  StringInfoData result;
3535
3536
0
  initStringInfo(&result);
3537
3538
0
  if (OidIsValid(relid))
3539
0
  {
3540
0
    HeapTuple tuple;
3541
0
    Form_pg_class reltuple;
3542
3543
0
    tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3544
0
    if (!HeapTupleIsValid(tuple))
3545
0
      elog(ERROR, "cache lookup failed for relation %u", relid);
3546
0
    reltuple = (Form_pg_class) GETSTRUCT(tuple);
3547
3548
0
    xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3549
0
                         true, false);
3550
3551
0
    tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3552
0
                                 get_database_name(MyDatabaseId),
3553
0
                                 get_namespace_name(reltuple->relnamespace),
3554
0
                                 NameStr(reltuple->relname));
3555
3556
0
    rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3557
0
                                 get_database_name(MyDatabaseId),
3558
0
                                 get_namespace_name(reltuple->relnamespace),
3559
0
                                 NameStr(reltuple->relname));
3560
3561
0
    ReleaseSysCache(tuple);
3562
0
  }
3563
0
  else
3564
0
  {
3565
0
    if (tableforest)
3566
0
      xmltn = "row";
3567
0
    else
3568
0
      xmltn = "table";
3569
3570
0
    tabletypename = "TableType";
3571
0
    rowtypename = "RowType";
3572
0
  }
3573
3574
0
  xsd_schema_element_start(&result, targetns);
3575
3576
0
  appendStringInfoString(&result,
3577
0
               map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3578
3579
0
  appendStringInfo(&result,
3580
0
           "<xsd:complexType name=\"%s\">\n"
3581
0
           "  <xsd:sequence>\n",
3582
0
           rowtypename);
3583
3584
0
  for (i = 0; i < tupdesc->natts; i++)
3585
0
  {
3586
0
    Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3587
3588
0
    if (att->attisdropped)
3589
0
      continue;
3590
0
    appendStringInfo(&result,
3591
0
             "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3592
0
             map_sql_identifier_to_xml_name(NameStr(att->attname),
3593
0
                            true, false),
3594
0
             map_sql_type_to_xml_name(att->atttypid, -1),
3595
0
             nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3596
0
  }
3597
3598
0
  appendStringInfoString(&result,
3599
0
               "  </xsd:sequence>\n"
3600
0
               "</xsd:complexType>\n\n");
3601
3602
0
  if (!tableforest)
3603
0
  {
3604
0
    appendStringInfo(&result,
3605
0
             "<xsd:complexType name=\"%s\">\n"
3606
0
             "  <xsd:sequence>\n"
3607
0
             "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3608
0
             "  </xsd:sequence>\n"
3609
0
             "</xsd:complexType>\n\n",
3610
0
             tabletypename, rowtypename);
3611
3612
0
    appendStringInfo(&result,
3613
0
             "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3614
0
             xmltn, tabletypename);
3615
0
  }
3616
0
  else
3617
0
    appendStringInfo(&result,
3618
0
             "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3619
0
             xmltn, rowtypename);
3620
3621
0
  xsd_schema_element_end(&result);
3622
3623
0
  return result.data;
3624
0
}
3625
3626
3627
/*
3628
 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3629
 * section 9.12.
3630
 */
3631
static const char *
3632
map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3633
                  bool tableforest, const char *targetns)
3634
0
{
3635
0
  char     *dbname;
3636
0
  char     *nspname;
3637
0
  char     *xmlsn;
3638
0
  char     *schematypename;
3639
0
  StringInfoData result;
3640
0
  ListCell   *cell;
3641
3642
0
  dbname = get_database_name(MyDatabaseId);
3643
0
  nspname = get_namespace_name(nspid);
3644
3645
0
  initStringInfo(&result);
3646
3647
0
  xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3648
3649
0
  schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3650
0
                                dbname,
3651
0
                                nspname,
3652
0
                                NULL);
3653
3654
0
  appendStringInfo(&result,
3655
0
           "<xsd:complexType name=\"%s\">\n", schematypename);
3656
0
  if (!tableforest)
3657
0
    appendStringInfoString(&result,
3658
0
                 "  <xsd:all>\n");
3659
0
  else
3660
0
    appendStringInfoString(&result,
3661
0
                 "  <xsd:sequence>\n");
3662
3663
0
  foreach(cell, relid_list)
3664
0
  {
3665
0
    Oid     relid = lfirst_oid(cell);
3666
0
    char     *relname = get_rel_name(relid);
3667
0
    char     *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3668
0
    char     *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3669
0
                                       dbname,
3670
0
                                       nspname,
3671
0
                                       relname);
3672
3673
0
    if (!tableforest)
3674
0
      appendStringInfo(&result,
3675
0
               "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3676
0
               xmltn, tabletypename);
3677
0
    else
3678
0
      appendStringInfo(&result,
3679
0
               "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3680
0
               xmltn, tabletypename);
3681
0
  }
3682
3683
0
  if (!tableforest)
3684
0
    appendStringInfoString(&result,
3685
0
                 "  </xsd:all>\n");
3686
0
  else
3687
0
    appendStringInfoString(&result,
3688
0
                 "  </xsd:sequence>\n");
3689
0
  appendStringInfoString(&result,
3690
0
               "</xsd:complexType>\n\n");
3691
3692
0
  appendStringInfo(&result,
3693
0
           "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3694
0
           xmlsn, schematypename);
3695
3696
0
  return result.data;
3697
0
}
3698
3699
3700
/*
3701
 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3702
 * section 9.15.
3703
 */
3704
static const char *
3705
map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3706
                   bool tableforest, const char *targetns)
3707
0
{
3708
0
  char     *dbname;
3709
0
  char     *xmlcn;
3710
0
  char     *catalogtypename;
3711
0
  StringInfoData result;
3712
0
  ListCell   *cell;
3713
3714
0
  dbname = get_database_name(MyDatabaseId);
3715
3716
0
  initStringInfo(&result);
3717
3718
0
  xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3719
3720
0
  catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3721
0
                                 dbname,
3722
0
                                 NULL,
3723
0
                                 NULL);
3724
3725
0
  appendStringInfo(&result,
3726
0
           "<xsd:complexType name=\"%s\">\n", catalogtypename);
3727
0
  appendStringInfoString(&result,
3728
0
               "  <xsd:all>\n");
3729
3730
0
  foreach(cell, nspid_list)
3731
0
  {
3732
0
    Oid     nspid = lfirst_oid(cell);
3733
0
    char     *nspname = get_namespace_name(nspid);
3734
0
    char     *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3735
0
    char     *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3736
0
                                        dbname,
3737
0
                                        nspname,
3738
0
                                        NULL);
3739
3740
0
    appendStringInfo(&result,
3741
0
             "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3742
0
             xmlsn, schematypename);
3743
0
  }
3744
3745
0
  appendStringInfoString(&result,
3746
0
               "  </xsd:all>\n");
3747
0
  appendStringInfoString(&result,
3748
0
               "</xsd:complexType>\n\n");
3749
3750
0
  appendStringInfo(&result,
3751
0
           "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3752
0
           xmlcn, catalogtypename);
3753
3754
0
  return result.data;
3755
0
}
3756
3757
3758
/*
3759
 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3760
 */
3761
static const char *
3762
map_sql_type_to_xml_name(Oid typeoid, int typmod)
3763
0
{
3764
0
  StringInfoData result;
3765
3766
0
  initStringInfo(&result);
3767
3768
0
  switch (typeoid)
3769
0
  {
3770
0
    case BPCHAROID:
3771
0
      if (typmod == -1)
3772
0
        appendStringInfoString(&result, "CHAR");
3773
0
      else
3774
0
        appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3775
0
      break;
3776
0
    case VARCHAROID:
3777
0
      if (typmod == -1)
3778
0
        appendStringInfoString(&result, "VARCHAR");
3779
0
      else
3780
0
        appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3781
0
      break;
3782
0
    case NUMERICOID:
3783
0
      if (typmod == -1)
3784
0
        appendStringInfoString(&result, "NUMERIC");
3785
0
      else
3786
0
        appendStringInfo(&result, "NUMERIC_%d_%d",
3787
0
                 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3788
0
                 (typmod - VARHDRSZ) & 0xffff);
3789
0
      break;
3790
0
    case INT4OID:
3791
0
      appendStringInfoString(&result, "INTEGER");
3792
0
      break;
3793
0
    case INT2OID:
3794
0
      appendStringInfoString(&result, "SMALLINT");
3795
0
      break;
3796
0
    case INT8OID:
3797
0
      appendStringInfoString(&result, "BIGINT");
3798
0
      break;
3799
0
    case FLOAT4OID:
3800
0
      appendStringInfoString(&result, "REAL");
3801
0
      break;
3802
0
    case FLOAT8OID:
3803
0
      appendStringInfoString(&result, "DOUBLE");
3804
0
      break;
3805
0
    case BOOLOID:
3806
0
      appendStringInfoString(&result, "BOOLEAN");
3807
0
      break;
3808
0
    case TIMEOID:
3809
0
      if (typmod == -1)
3810
0
        appendStringInfoString(&result, "TIME");
3811
0
      else
3812
0
        appendStringInfo(&result, "TIME_%d", typmod);
3813
0
      break;
3814
0
    case TIMETZOID:
3815
0
      if (typmod == -1)
3816
0
        appendStringInfoString(&result, "TIME_WTZ");
3817
0
      else
3818
0
        appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3819
0
      break;
3820
0
    case TIMESTAMPOID:
3821
0
      if (typmod == -1)
3822
0
        appendStringInfoString(&result, "TIMESTAMP");
3823
0
      else
3824
0
        appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3825
0
      break;
3826
0
    case TIMESTAMPTZOID:
3827
0
      if (typmod == -1)
3828
0
        appendStringInfoString(&result, "TIMESTAMP_WTZ");
3829
0
      else
3830
0
        appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3831
0
      break;
3832
0
    case DATEOID:
3833
0
      appendStringInfoString(&result, "DATE");
3834
0
      break;
3835
0
    case XMLOID:
3836
0
      appendStringInfoString(&result, "XML");
3837
0
      break;
3838
0
    default:
3839
0
      {
3840
0
        HeapTuple tuple;
3841
0
        Form_pg_type typtuple;
3842
3843
0
        tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3844
0
        if (!HeapTupleIsValid(tuple))
3845
0
          elog(ERROR, "cache lookup failed for type %u", typeoid);
3846
0
        typtuple = (Form_pg_type) GETSTRUCT(tuple);
3847
3848
0
        appendStringInfoString(&result,
3849
0
                     map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3850
0
                                        get_database_name(MyDatabaseId),
3851
0
                                        get_namespace_name(typtuple->typnamespace),
3852
0
                                        NameStr(typtuple->typname)));
3853
3854
0
        ReleaseSysCache(tuple);
3855
0
      }
3856
0
  }
3857
3858
0
  return result.data;
3859
0
}
3860
3861
3862
/*
3863
 * Map a collection of SQL data types to XML Schema data types; see
3864
 * SQL/XML:2008 section 9.7.
3865
 */
3866
static const char *
3867
map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3868
0
{
3869
0
  List     *uniquetypes = NIL;
3870
0
  int     i;
3871
0
  StringInfoData result;
3872
0
  ListCell   *cell0;
3873
3874
  /* extract all column types used in the set of TupleDescs */
3875
0
  foreach(cell0, tupdesc_list)
3876
0
  {
3877
0
    TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3878
3879
0
    for (i = 0; i < tupdesc->natts; i++)
3880
0
    {
3881
0
      Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3882
3883
0
      if (att->attisdropped)
3884
0
        continue;
3885
0
      uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3886
0
    }
3887
0
  }
3888
3889
  /* add base types of domains */
3890
0
  foreach(cell0, uniquetypes)
3891
0
  {
3892
0
    Oid     typid = lfirst_oid(cell0);
3893
0
    Oid     basetypid = getBaseType(typid);
3894
3895
0
    if (basetypid != typid)
3896
0
      uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3897
0
  }
3898
3899
  /* Convert to textual form */
3900
0
  initStringInfo(&result);
3901
3902
0
  foreach(cell0, uniquetypes)
3903
0
  {
3904
0
    appendStringInfo(&result, "%s\n",
3905
0
             map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3906
0
                            -1));
3907
0
  }
3908
3909
0
  return result.data;
3910
0
}
3911
3912
3913
/*
3914
 * Map an SQL data type to a named XML Schema data type; see
3915
 * SQL/XML:2008 sections 9.5 and 9.6.
3916
 *
3917
 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3918
 * a name attribute, which this function does.  The name-less version
3919
 * 9.5 doesn't appear to be required anywhere.)
3920
 */
3921
static const char *
3922
map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3923
0
{
3924
0
  StringInfoData result;
3925
0
  const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3926
3927
0
  initStringInfo(&result);
3928
3929
0
  if (typeoid == XMLOID)
3930
0
  {
3931
0
    appendStringInfoString(&result,
3932
0
                 "<xsd:complexType mixed=\"true\">\n"
3933
0
                 "  <xsd:sequence>\n"
3934
0
                 "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3935
0
                 "  </xsd:sequence>\n"
3936
0
                 "</xsd:complexType>\n");
3937
0
  }
3938
0
  else
3939
0
  {
3940
0
    appendStringInfo(&result,
3941
0
             "<xsd:simpleType name=\"%s\">\n", typename);
3942
3943
0
    switch (typeoid)
3944
0
    {
3945
0
      case BPCHAROID:
3946
0
      case VARCHAROID:
3947
0
      case TEXTOID:
3948
0
        appendStringInfoString(&result,
3949
0
                     "  <xsd:restriction base=\"xsd:string\">\n");
3950
0
        if (typmod != -1)
3951
0
          appendStringInfo(&result,
3952
0
                   "    <xsd:maxLength value=\"%d\"/>\n",
3953
0
                   typmod - VARHDRSZ);
3954
0
        appendStringInfoString(&result, "  </xsd:restriction>\n");
3955
0
        break;
3956
3957
0
      case BYTEAOID:
3958
0
        appendStringInfo(&result,
3959
0
                 "  <xsd:restriction base=\"xsd:%s\">\n"
3960
0
                 "  </xsd:restriction>\n",
3961
0
                 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3962
0
        break;
3963
3964
0
      case NUMERICOID:
3965
0
        if (typmod != -1)
3966
0
          appendStringInfo(&result,
3967
0
                   "  <xsd:restriction base=\"xsd:decimal\">\n"
3968
0
                   "    <xsd:totalDigits value=\"%d\"/>\n"
3969
0
                   "    <xsd:fractionDigits value=\"%d\"/>\n"
3970
0
                   "  </xsd:restriction>\n",
3971
0
                   ((typmod - VARHDRSZ) >> 16) & 0xffff,
3972
0
                   (typmod - VARHDRSZ) & 0xffff);
3973
0
        break;
3974
3975
0
      case INT2OID:
3976
0
        appendStringInfo(&result,
3977
0
                 "  <xsd:restriction base=\"xsd:short\">\n"
3978
0
                 "    <xsd:maxInclusive value=\"%d\"/>\n"
3979
0
                 "    <xsd:minInclusive value=\"%d\"/>\n"
3980
0
                 "  </xsd:restriction>\n",
3981
0
                 SHRT_MAX, SHRT_MIN);
3982
0
        break;
3983
3984
0
      case INT4OID:
3985
0
        appendStringInfo(&result,
3986
0
                 "  <xsd:restriction base=\"xsd:int\">\n"
3987
0
                 "    <xsd:maxInclusive value=\"%d\"/>\n"
3988
0
                 "    <xsd:minInclusive value=\"%d\"/>\n"
3989
0
                 "  </xsd:restriction>\n",
3990
0
                 INT_MAX, INT_MIN);
3991
0
        break;
3992
3993
0
      case INT8OID:
3994
0
        appendStringInfo(&result,
3995
0
                 "  <xsd:restriction base=\"xsd:long\">\n"
3996
0
                 "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3997
0
                 "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3998
0
                 "  </xsd:restriction>\n",
3999
0
                 PG_INT64_MAX,
4000
0
                 PG_INT64_MIN);
4001
0
        break;
4002
4003
0
      case FLOAT4OID:
4004
0
        appendStringInfoString(&result,
4005
0
                     "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
4006
0
        break;
4007
4008
0
      case FLOAT8OID:
4009
0
        appendStringInfoString(&result,
4010
0
                     "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
4011
0
        break;
4012
4013
0
      case BOOLOID:
4014
0
        appendStringInfoString(&result,
4015
0
                     "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4016
0
        break;
4017
4018
0
      case TIMEOID:
4019
0
      case TIMETZOID:
4020
0
        {
4021
0
          const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4022
4023
0
          if (typmod == -1)
4024
0
            appendStringInfo(&result,
4025
0
                     "  <xsd:restriction base=\"xsd:time\">\n"
4026
0
                     "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4027
0
                     "  </xsd:restriction>\n", tz);
4028
0
          else if (typmod == 0)
4029
0
            appendStringInfo(&result,
4030
0
                     "  <xsd:restriction base=\"xsd:time\">\n"
4031
0
                     "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4032
0
                     "  </xsd:restriction>\n", tz);
4033
0
          else
4034
0
            appendStringInfo(&result,
4035
0
                     "  <xsd:restriction base=\"xsd:time\">\n"
4036
0
                     "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4037
0
                     "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4038
0
          break;
4039
0
        }
4040
4041
0
      case TIMESTAMPOID:
4042
0
      case TIMESTAMPTZOID:
4043
0
        {
4044
0
          const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4045
4046
0
          if (typmod == -1)
4047
0
            appendStringInfo(&result,
4048
0
                     "  <xsd:restriction base=\"xsd:dateTime\">\n"
4049
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4050
0
                     "  </xsd:restriction>\n", tz);
4051
0
          else if (typmod == 0)
4052
0
            appendStringInfo(&result,
4053
0
                     "  <xsd:restriction base=\"xsd:dateTime\">\n"
4054
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4055
0
                     "  </xsd:restriction>\n", tz);
4056
0
          else
4057
0
            appendStringInfo(&result,
4058
0
                     "  <xsd:restriction base=\"xsd:dateTime\">\n"
4059
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4060
0
                     "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4061
0
          break;
4062
0
        }
4063
4064
0
      case DATEOID:
4065
0
        appendStringInfoString(&result,
4066
0
                     "  <xsd:restriction base=\"xsd:date\">\n"
4067
0
                     "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4068
0
                     "  </xsd:restriction>\n");
4069
0
        break;
4070
4071
0
      default:
4072
0
        if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4073
0
        {
4074
0
          Oid     base_typeoid;
4075
0
          int32   base_typmod = -1;
4076
4077
0
          base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4078
4079
0
          appendStringInfo(&result,
4080
0
                   "  <xsd:restriction base=\"%s\"/>\n",
4081
0
                   map_sql_type_to_xml_name(base_typeoid, base_typmod));
4082
0
        }
4083
0
        break;
4084
0
    }
4085
0
    appendStringInfoString(&result, "</xsd:simpleType>\n");
4086
0
  }
4087
4088
0
  return result.data;
4089
0
}
4090
4091
4092
/*
4093
 * Map an SQL row to an XML element, taking the row from the active
4094
 * SPI cursor.  See also SQL/XML:2008 section 9.10.
4095
 */
4096
static void
4097
SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4098
              bool nulls, bool tableforest,
4099
              const char *targetns, bool top_level)
4100
0
{
4101
0
  int     i;
4102
0
  char     *xmltn;
4103
4104
0
  if (tablename)
4105
0
    xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4106
0
  else
4107
0
  {
4108
0
    if (tableforest)
4109
0
      xmltn = "row";
4110
0
    else
4111
0
      xmltn = "table";
4112
0
  }
4113
4114
0
  if (tableforest)
4115
0
    xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4116
0
  else
4117
0
    appendStringInfoString(result, "<row>\n");
4118
4119
0
  for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4120
0
  {
4121
0
    char     *colname;
4122
0
    Datum   colval;
4123
0
    bool    isnull;
4124
4125
0
    colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4126
0
                         true, false);
4127
0
    colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4128
0
                 SPI_tuptable->tupdesc,
4129
0
                 i,
4130
0
                 &isnull);
4131
0
    if (isnull)
4132
0
    {
4133
0
      if (nulls)
4134
0
        appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
4135
0
    }
4136
0
    else
4137
0
      appendStringInfo(result, "  <%s>%s</%s>\n",
4138
0
               colname,
4139
0
               map_sql_value_to_xml_value(colval,
4140
0
                            SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4141
0
               colname);
4142
0
  }
4143
4144
0
  if (tableforest)
4145
0
  {
4146
0
    xmldata_root_element_end(result, xmltn);
4147
0
    appendStringInfoChar(result, '\n');
4148
0
  }
4149
0
  else
4150
0
    appendStringInfoString(result, "</row>\n\n");
4151
0
}
4152
4153
4154
/*
4155
 * XPath related functions
4156
 */
4157
4158
#ifdef USE_LIBXML
4159
4160
/*
4161
 * Convert XML node to text.
4162
 *
4163
 * For attribute and text nodes, return the escaped text.  For anything else,
4164
 * dump the whole subtree.
4165
 */
4166
static text *
4167
xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4168
{
4169
  xmltype    *result = NULL;
4170
4171
  if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4172
  {
4173
    void    (*volatile nodefree) (xmlNodePtr) = NULL;
4174
    volatile xmlBufferPtr buf = NULL;
4175
    volatile xmlNodePtr cur_copy = NULL;
4176
4177
    PG_TRY();
4178
    {
4179
      int     bytes;
4180
4181
      buf = xmlBufferCreate();
4182
      if (buf == NULL || xmlerrcxt->err_occurred)
4183
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4184
              "could not allocate xmlBuffer");
4185
4186
      /*
4187
       * Produce a dump of the node that we can serialize.  xmlNodeDump
4188
       * does that, but the result of that function won't contain
4189
       * namespace definitions from ancestor nodes, so we first do a
4190
       * xmlCopyNode() which duplicates the node along with its required
4191
       * namespace definitions.
4192
       *
4193
       * Some old libxml2 versions such as 2.7.6 produce partially
4194
       * broken XML_DOCUMENT_NODE nodes (unset content field) when
4195
       * copying them.  xmlNodeDump of such a node works fine, but
4196
       * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4197
       */
4198
      cur_copy = xmlCopyNode(cur, 1);
4199
      if (cur_copy == NULL || xmlerrcxt->err_occurred)
4200
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4201
              "could not copy node");
4202
      nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4203
        (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4204
4205
      bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4206
      if (bytes == -1 || xmlerrcxt->err_occurred)
4207
        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4208
              "could not dump node");
4209
4210
      result = xmlBuffer_to_xmltype(buf);
4211
    }
4212
    PG_FINALLY();
4213
    {
4214
      if (nodefree)
4215
        nodefree(cur_copy);
4216
      if (buf)
4217
        xmlBufferFree(buf);
4218
    }
4219
    PG_END_TRY();
4220
  }
4221
  else
4222
  {
4223
    xmlChar    *str;
4224
4225
    str = xmlXPathCastNodeToString(cur);
4226
    PG_TRY();
4227
    {
4228
      /* Here we rely on XML having the same representation as TEXT */
4229
      char     *escaped = escape_xml((char *) str);
4230
4231
      result = (xmltype *) cstring_to_text(escaped);
4232
      pfree(escaped);
4233
    }
4234
    PG_FINALLY();
4235
    {
4236
      xmlFree(str);
4237
    }
4238
    PG_END_TRY();
4239
  }
4240
4241
  return result;
4242
}
4243
4244
/*
4245
 * Convert an XML XPath object (the result of evaluating an XPath expression)
4246
 * to an array of xml values, which are appended to astate.  The function
4247
 * result value is the number of elements in the array.
4248
 *
4249
 * If "astate" is NULL then we don't generate the array value, but we still
4250
 * return the number of elements it would have had.
4251
 *
4252
 * Nodesets are converted to an array containing the nodes' textual
4253
 * representations.  Primitive values (float, double, string) are converted
4254
 * to a single-element array containing the value's string representation.
4255
 */
4256
static int
4257
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4258
             ArrayBuildState *astate,
4259
             PgXmlErrorContext *xmlerrcxt)
4260
{
4261
  int     result = 0;
4262
  Datum   datum;
4263
  Oid     datumtype;
4264
  char     *result_str;
4265
4266
  switch (xpathobj->type)
4267
  {
4268
    case XPATH_NODESET:
4269
      if (xpathobj->nodesetval != NULL)
4270
      {
4271
        result = xpathobj->nodesetval->nodeNr;
4272
        if (astate != NULL)
4273
        {
4274
          int     i;
4275
4276
          for (i = 0; i < result; i++)
4277
          {
4278
            datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4279
                                   xmlerrcxt));
4280
            (void) accumArrayResult(astate, datum, false,
4281
                        XMLOID, CurrentMemoryContext);
4282
          }
4283
        }
4284
      }
4285
      return result;
4286
4287
    case XPATH_BOOLEAN:
4288
      if (astate == NULL)
4289
        return 1;
4290
      datum = BoolGetDatum(xpathobj->boolval);
4291
      datumtype = BOOLOID;
4292
      break;
4293
4294
    case XPATH_NUMBER:
4295
      if (astate == NULL)
4296
        return 1;
4297
      datum = Float8GetDatum(xpathobj->floatval);
4298
      datumtype = FLOAT8OID;
4299
      break;
4300
4301
    case XPATH_STRING:
4302
      if (astate == NULL)
4303
        return 1;
4304
      datum = CStringGetDatum((char *) xpathobj->stringval);
4305
      datumtype = CSTRINGOID;
4306
      break;
4307
4308
    default:
4309
      elog(ERROR, "xpath expression result type %d is unsupported",
4310
         xpathobj->type);
4311
      return 0;     /* keep compiler quiet */
4312
  }
4313
4314
  /* Common code for scalar-value cases */
4315
  result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4316
  datum = PointerGetDatum(cstring_to_xmltype(result_str));
4317
  (void) accumArrayResult(astate, datum, false,
4318
              XMLOID, CurrentMemoryContext);
4319
  return 1;
4320
}
4321
4322
4323
/*
4324
 * Common code for xpath() and xmlexists()
4325
 *
4326
 * Evaluate XPath expression and return number of nodes in res_nitems
4327
 * and array of XML values in astate.  Either of those pointers can be
4328
 * NULL if the corresponding result isn't wanted.
4329
 *
4330
 * It is up to the user to ensure that the XML passed is in fact
4331
 * an XML document - XPath doesn't work easily on fragments without
4332
 * a context node being known.
4333
 */
4334
static void
4335
xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4336
         int *res_nitems, ArrayBuildState *astate)
4337
{
4338
  PgXmlErrorContext *xmlerrcxt;
4339
  volatile xmlParserCtxtPtr ctxt = NULL;
4340
  volatile xmlDocPtr doc = NULL;
4341
  volatile xmlXPathContextPtr xpathctx = NULL;
4342
  volatile xmlXPathCompExprPtr xpathcomp = NULL;
4343
  volatile xmlXPathObjectPtr xpathobj = NULL;
4344
  char     *datastr;
4345
  int32   len;
4346
  int32   xpath_len;
4347
  xmlChar    *string;
4348
  xmlChar    *xpath_expr;
4349
  size_t    xmldecl_len = 0;
4350
  int     i;
4351
  int     ndim;
4352
  Datum    *ns_names_uris;
4353
  bool     *ns_names_uris_nulls;
4354
  int     ns_count;
4355
4356
  /*
4357
   * Namespace mappings are passed as text[].  If an empty array is passed
4358
   * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4359
   * Else, a 2-dimensional array with length of the second axis being equal
4360
   * to 2 should be passed, i.e., every subarray contains 2 elements, the
4361
   * first element defining the name, the second one the URI.  Example:
4362
   * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4363
   * 'http://example2.com']].
4364
   */
4365
  ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4366
  if (ndim != 0)
4367
  {
4368
    int      *dims;
4369
4370
    dims = ARR_DIMS(namespaces);
4371
4372
    if (ndim != 2 || dims[1] != 2)
4373
      ereport(ERROR,
4374
          (errcode(ERRCODE_DATA_EXCEPTION),
4375
           errmsg("invalid array for XML namespace mapping"),
4376
           errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4377
4378
    Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4379
4380
    deconstruct_array_builtin(namespaces, TEXTOID,
4381
                  &ns_names_uris, &ns_names_uris_nulls,
4382
                  &ns_count);
4383
4384
    Assert((ns_count % 2) == 0);  /* checked above */
4385
    ns_count /= 2;      /* count pairs only */
4386
  }
4387
  else
4388
  {
4389
    ns_names_uris = NULL;
4390
    ns_names_uris_nulls = NULL;
4391
    ns_count = 0;
4392
  }
4393
4394
  datastr = VARDATA(data);
4395
  len = VARSIZE(data) - VARHDRSZ;
4396
  xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4397
  if (xpath_len == 0)
4398
    ereport(ERROR,
4399
        (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4400
         errmsg("empty XPath expression")));
4401
4402
  string = pg_xmlCharStrndup(datastr, len);
4403
  xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4404
4405
  /*
4406
   * In a UTF8 database, skip any xml declaration, which might assert
4407
   * another encoding.  Ignore parse_xml_decl() failure, letting
4408
   * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
4409
   * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4410
   * those scenarios bug-compatible with historical behavior.
4411
   */
4412
  if (GetDatabaseEncoding() == PG_UTF8)
4413
    parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4414
4415
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4416
4417
  PG_TRY();
4418
  {
4419
    xmlInitParser();
4420
4421
    /*
4422
     * redundant XML parsing (two parsings for the same value during one
4423
     * command execution are possible)
4424
     */
4425
    ctxt = xmlNewParserCtxt();
4426
    if (ctxt == NULL || xmlerrcxt->err_occurred)
4427
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4428
            "could not allocate parser context");
4429
    doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4430
                len - xmldecl_len, NULL, NULL, 0);
4431
    if (doc == NULL || xmlerrcxt->err_occurred)
4432
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4433
            "could not parse XML document");
4434
    xpathctx = xmlXPathNewContext(doc);
4435
    if (xpathctx == NULL || xmlerrcxt->err_occurred)
4436
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4437
            "could not allocate XPath context");
4438
    xpathctx->node = (xmlNodePtr) doc;
4439
4440
    /* register namespaces, if any */
4441
    if (ns_count > 0)
4442
    {
4443
      for (i = 0; i < ns_count; i++)
4444
      {
4445
        char     *ns_name;
4446
        char     *ns_uri;
4447
4448
        if (ns_names_uris_nulls[i * 2] ||
4449
          ns_names_uris_nulls[i * 2 + 1])
4450
          ereport(ERROR,
4451
              (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4452
               errmsg("neither namespace name nor URI may be null")));
4453
        ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4454
        ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4455
        if (xmlXPathRegisterNs(xpathctx,
4456
                     (xmlChar *) ns_name,
4457
                     (xmlChar *) ns_uri) != 0)
4458
          ereport(ERROR,  /* is this an internal error??? */
4459
              (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4460
                  ns_name, ns_uri)));
4461
      }
4462
    }
4463
4464
    /*
4465
     * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4466
     * xmlXPathCompile.  In libxml2 2.13.3 and older, the latter function
4467
     * fails to defend itself against recursion-to-stack-overflow.  See
4468
     * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4469
     */
4470
    xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4471
    if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4472
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4473
            "invalid XPath expression");
4474
4475
    /*
4476
     * Version 2.6.27 introduces a function named
4477
     * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4478
     * but we can derive the existence by whether any nodes are returned,
4479
     * thereby preventing a library version upgrade and keeping the code
4480
     * the same.
4481
     */
4482
    xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4483
    if (xpathobj == NULL || xmlerrcxt->err_occurred)
4484
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4485
            "could not create XPath object");
4486
4487
    /*
4488
     * Extract the results as requested.
4489
     */
4490
    if (res_nitems != NULL)
4491
      *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4492
    else
4493
      (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4494
  }
4495
  PG_CATCH();
4496
  {
4497
    if (xpathobj)
4498
      xmlXPathFreeObject(xpathobj);
4499
    if (xpathcomp)
4500
      xmlXPathFreeCompExpr(xpathcomp);
4501
    if (xpathctx)
4502
      xmlXPathFreeContext(xpathctx);
4503
    if (doc)
4504
      xmlFreeDoc(doc);
4505
    if (ctxt)
4506
      xmlFreeParserCtxt(ctxt);
4507
4508
    pg_xml_done(xmlerrcxt, true);
4509
4510
    PG_RE_THROW();
4511
  }
4512
  PG_END_TRY();
4513
4514
  xmlXPathFreeObject(xpathobj);
4515
  xmlXPathFreeCompExpr(xpathcomp);
4516
  xmlXPathFreeContext(xpathctx);
4517
  xmlFreeDoc(doc);
4518
  xmlFreeParserCtxt(ctxt);
4519
4520
  pg_xml_done(xmlerrcxt, false);
4521
}
4522
#endif              /* USE_LIBXML */
4523
4524
/*
4525
 * Evaluate XPath expression and return array of XML values.
4526
 *
4527
 * As we have no support of XQuery sequences yet, this function seems
4528
 * to be the most useful one (array of XML functions plays a role of
4529
 * some kind of substitution for XQuery sequences).
4530
 */
4531
Datum
4532
xpath(PG_FUNCTION_ARGS)
4533
0
{
4534
#ifdef USE_LIBXML
4535
  text     *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4536
  xmltype    *data = PG_GETARG_XML_P(1);
4537
  ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4538
  ArrayBuildState *astate;
4539
4540
  astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4541
  xpath_internal(xpath_expr_text, data, namespaces,
4542
           NULL, astate);
4543
  PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4544
#else
4545
0
  NO_XML_SUPPORT();
4546
0
  return 0;
4547
0
#endif
4548
0
}
4549
4550
/*
4551
 * Determines if the node specified by the supplied XPath exists
4552
 * in a given XML document, returning a boolean.
4553
 */
4554
Datum
4555
xmlexists(PG_FUNCTION_ARGS)
4556
0
{
4557
#ifdef USE_LIBXML
4558
  text     *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4559
  xmltype    *data = PG_GETARG_XML_P(1);
4560
  int     res_nitems;
4561
4562
  xpath_internal(xpath_expr_text, data, NULL,
4563
           &res_nitems, NULL);
4564
4565
  PG_RETURN_BOOL(res_nitems > 0);
4566
#else
4567
0
  NO_XML_SUPPORT();
4568
0
  return 0;
4569
0
#endif
4570
0
}
4571
4572
/*
4573
 * Determines if the node specified by the supplied XPath exists
4574
 * in a given XML document, returning a boolean. Differs from
4575
 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4576
 */
4577
Datum
4578
xpath_exists(PG_FUNCTION_ARGS)
4579
0
{
4580
#ifdef USE_LIBXML
4581
  text     *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4582
  xmltype    *data = PG_GETARG_XML_P(1);
4583
  ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4584
  int     res_nitems;
4585
4586
  xpath_internal(xpath_expr_text, data, namespaces,
4587
           &res_nitems, NULL);
4588
4589
  PG_RETURN_BOOL(res_nitems > 0);
4590
#else
4591
0
  NO_XML_SUPPORT();
4592
0
  return 0;
4593
0
#endif
4594
0
}
4595
4596
/*
4597
 * Functions for checking well-formed-ness
4598
 */
4599
4600
#ifdef USE_LIBXML
4601
static bool
4602
wellformed_xml(text *data, XmlOptionType xmloption_arg)
4603
{
4604
  xmlDocPtr doc;
4605
  ErrorSaveContext escontext = {T_ErrorSaveContext};
4606
4607
  /*
4608
   * We'll report "true" if no soft error is reported by xml_parse().
4609
   */
4610
  doc = xml_parse(data, xmloption_arg, true,
4611
          GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4612
  if (doc)
4613
    xmlFreeDoc(doc);
4614
4615
  return !escontext.error_occurred;
4616
}
4617
#endif
4618
4619
Datum
4620
xml_is_well_formed(PG_FUNCTION_ARGS)
4621
0
{
4622
#ifdef USE_LIBXML
4623
  text     *data = PG_GETARG_TEXT_PP(0);
4624
4625
  PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4626
#else
4627
0
  NO_XML_SUPPORT();
4628
0
  return 0;
4629
0
#endif              /* not USE_LIBXML */
4630
0
}
4631
4632
Datum
4633
xml_is_well_formed_document(PG_FUNCTION_ARGS)
4634
0
{
4635
#ifdef USE_LIBXML
4636
  text     *data = PG_GETARG_TEXT_PP(0);
4637
4638
  PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4639
#else
4640
0
  NO_XML_SUPPORT();
4641
0
  return 0;
4642
0
#endif              /* not USE_LIBXML */
4643
0
}
4644
4645
Datum
4646
xml_is_well_formed_content(PG_FUNCTION_ARGS)
4647
0
{
4648
#ifdef USE_LIBXML
4649
  text     *data = PG_GETARG_TEXT_PP(0);
4650
4651
  PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4652
#else
4653
0
  NO_XML_SUPPORT();
4654
0
  return 0;
4655
0
#endif              /* not USE_LIBXML */
4656
0
}
4657
4658
/*
4659
 * support functions for XMLTABLE
4660
 *
4661
 */
4662
#ifdef USE_LIBXML
4663
4664
/*
4665
 * Returns private data from executor state. Ensure validity by check with
4666
 * MAGIC number.
4667
 */
4668
static inline XmlTableBuilderData *
4669
GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4670
{
4671
  XmlTableBuilderData *result;
4672
4673
  if (!IsA(state, TableFuncScanState))
4674
    elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4675
  result = (XmlTableBuilderData *) state->opaque;
4676
  if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4677
    elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4678
4679
  return result;
4680
}
4681
#endif
4682
4683
/*
4684
 * XmlTableInitOpaque
4685
 *    Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4686
 *    the XML parser.
4687
 *
4688
 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4689
 * XmlTableDestroyOpaque, it is critical for robustness that no other
4690
 * executor nodes run until this node is processed to completion.  Caller
4691
 * must execute this to completion (probably filling a tuplestore to exhaust
4692
 * this node in a single pass) instead of using row-per-call mode.
4693
 */
4694
static void
4695
XmlTableInitOpaque(TableFuncScanState *state, int natts)
4696
0
{
4697
#ifdef USE_LIBXML
4698
  volatile xmlParserCtxtPtr ctxt = NULL;
4699
  XmlTableBuilderData *xtCxt;
4700
  PgXmlErrorContext *xmlerrcxt;
4701
4702
  xtCxt = palloc0(sizeof(XmlTableBuilderData));
4703
  xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4704
  xtCxt->natts = natts;
4705
  xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4706
4707
  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4708
4709
  PG_TRY();
4710
  {
4711
    xmlInitParser();
4712
4713
    ctxt = xmlNewParserCtxt();
4714
    if (ctxt == NULL || xmlerrcxt->err_occurred)
4715
      xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4716
            "could not allocate parser context");
4717
  }
4718
  PG_CATCH();
4719
  {
4720
    if (ctxt != NULL)
4721
      xmlFreeParserCtxt(ctxt);
4722
4723
    pg_xml_done(xmlerrcxt, true);
4724
4725
    PG_RE_THROW();
4726
  }
4727
  PG_END_TRY();
4728
4729
  xtCxt->xmlerrcxt = xmlerrcxt;
4730
  xtCxt->ctxt = ctxt;
4731
4732
  state->opaque = xtCxt;
4733
#else
4734
0
  NO_XML_SUPPORT();
4735
0
#endif              /* not USE_LIBXML */
4736
0
}
4737
4738
/*
4739
 * XmlTableSetDocument
4740
 *    Install the input document
4741
 */
4742
static void
4743
XmlTableSetDocument(TableFuncScanState *state, Datum value)
4744
0
{
4745
#ifdef USE_LIBXML
4746
  XmlTableBuilderData *xtCxt;
4747
  xmltype    *xmlval = DatumGetXmlP(value);
4748
  char     *str;
4749
  xmlChar    *xstr;
4750
  int     length;
4751
  volatile xmlDocPtr doc = NULL;
4752
  volatile xmlXPathContextPtr xpathcxt = NULL;
4753
4754
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4755
4756
  /*
4757
   * Use out function for casting to string (remove encoding property). See
4758
   * comment in xml_out.
4759
   */
4760
  str = xml_out_internal(xmlval, 0);
4761
4762
  length = strlen(str);
4763
  xstr = pg_xmlCharStrndup(str, length);
4764
4765
  PG_TRY();
4766
  {
4767
    doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4768
    if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4769
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4770
            "could not parse XML document");
4771
    xpathcxt = xmlXPathNewContext(doc);
4772
    if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4773
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4774
            "could not allocate XPath context");
4775
    xpathcxt->node = (xmlNodePtr) doc;
4776
  }
4777
  PG_CATCH();
4778
  {
4779
    if (xpathcxt != NULL)
4780
      xmlXPathFreeContext(xpathcxt);
4781
    if (doc != NULL)
4782
      xmlFreeDoc(doc);
4783
4784
    PG_RE_THROW();
4785
  }
4786
  PG_END_TRY();
4787
4788
  xtCxt->doc = doc;
4789
  xtCxt->xpathcxt = xpathcxt;
4790
#else
4791
0
  NO_XML_SUPPORT();
4792
0
#endif              /* not USE_LIBXML */
4793
0
}
4794
4795
/*
4796
 * XmlTableSetNamespace
4797
 *    Add a namespace declaration
4798
 */
4799
static void
4800
XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4801
0
{
4802
#ifdef USE_LIBXML
4803
  XmlTableBuilderData *xtCxt;
4804
4805
  if (name == NULL)
4806
    ereport(ERROR,
4807
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4808
         errmsg("DEFAULT namespace is not supported")));
4809
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4810
4811
  if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4812
               pg_xmlCharStrndup(name, strlen(name)),
4813
               pg_xmlCharStrndup(uri, strlen(uri))))
4814
    xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4815
          "could not set XML namespace");
4816
#else
4817
0
  NO_XML_SUPPORT();
4818
0
#endif              /* not USE_LIBXML */
4819
0
}
4820
4821
/*
4822
 * XmlTableSetRowFilter
4823
 *    Install the row-filter Xpath expression.
4824
 */
4825
static void
4826
XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4827
0
{
4828
#ifdef USE_LIBXML
4829
  XmlTableBuilderData *xtCxt;
4830
  xmlChar    *xstr;
4831
4832
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4833
4834
  if (*path == '\0')
4835
    ereport(ERROR,
4836
        (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4837
         errmsg("row path filter must not be empty string")));
4838
4839
  xstr = pg_xmlCharStrndup(path, strlen(path));
4840
4841
  /* We require XmlTableSetDocument to have been done already */
4842
  Assert(xtCxt->xpathcxt != NULL);
4843
4844
  xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4845
  if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4846
    xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4847
          "invalid XPath expression");
4848
#else
4849
0
  NO_XML_SUPPORT();
4850
0
#endif              /* not USE_LIBXML */
4851
0
}
4852
4853
/*
4854
 * XmlTableSetColumnFilter
4855
 *    Install the column-filter Xpath expression, for the given column.
4856
 */
4857
static void
4858
XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4859
0
{
4860
#ifdef USE_LIBXML
4861
  XmlTableBuilderData *xtCxt;
4862
  xmlChar    *xstr;
4863
4864
  Assert(PointerIsValid(path));
4865
4866
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4867
4868
  if (*path == '\0')
4869
    ereport(ERROR,
4870
        (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4871
         errmsg("column path filter must not be empty string")));
4872
4873
  xstr = pg_xmlCharStrndup(path, strlen(path));
4874
4875
  /* We require XmlTableSetDocument to have been done already */
4876
  Assert(xtCxt->xpathcxt != NULL);
4877
4878
  xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4879
  if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4880
    xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4881
          "invalid XPath expression");
4882
#else
4883
0
  NO_XML_SUPPORT();
4884
0
#endif              /* not USE_LIBXML */
4885
0
}
4886
4887
/*
4888
 * XmlTableFetchRow
4889
 *    Prepare the next "current" tuple for upcoming GetValue calls.
4890
 *    Returns false if the row-filter expression returned no more rows.
4891
 */
4892
static bool
4893
XmlTableFetchRow(TableFuncScanState *state)
4894
0
{
4895
#ifdef USE_LIBXML
4896
  XmlTableBuilderData *xtCxt;
4897
4898
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4899
4900
  /* Propagate our own error context to libxml2 */
4901
  xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4902
4903
  if (xtCxt->xpathobj == NULL)
4904
  {
4905
    xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4906
    if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4907
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4908
            "could not create XPath object");
4909
4910
    xtCxt->row_count = 0;
4911
  }
4912
4913
  if (xtCxt->xpathobj->type == XPATH_NODESET)
4914
  {
4915
    if (xtCxt->xpathobj->nodesetval != NULL)
4916
    {
4917
      if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4918
        return true;
4919
    }
4920
  }
4921
4922
  return false;
4923
#else
4924
0
  NO_XML_SUPPORT();
4925
0
  return false;
4926
0
#endif              /* not USE_LIBXML */
4927
0
}
4928
4929
/*
4930
 * XmlTableGetValue
4931
 *    Return the value for column number 'colnum' for the current row.  If
4932
 *    column -1 is requested, return representation of the whole row.
4933
 *
4934
 * This leaks memory, so be sure to reset often the context in which it's
4935
 * called.
4936
 */
4937
static Datum
4938
XmlTableGetValue(TableFuncScanState *state, int colnum,
4939
         Oid typid, int32 typmod, bool *isnull)
4940
0
{
4941
#ifdef USE_LIBXML
4942
  Datum   result = (Datum) 0;
4943
  XmlTableBuilderData *xtCxt;
4944
  volatile xmlXPathObjectPtr xpathobj = NULL;
4945
4946
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4947
4948
  Assert(xtCxt->xpathobj &&
4949
       xtCxt->xpathobj->type == XPATH_NODESET &&
4950
       xtCxt->xpathobj->nodesetval != NULL);
4951
4952
  /* Propagate our own error context to libxml2 */
4953
  xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4954
4955
  *isnull = false;
4956
4957
  Assert(xtCxt->xpathscomp[colnum] != NULL);
4958
4959
  PG_TRY();
4960
  {
4961
    xmlNodePtr  cur;
4962
    char     *cstr = NULL;
4963
4964
    /* Set current node as entry point for XPath evaluation */
4965
    cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4966
    xtCxt->xpathcxt->node = cur;
4967
4968
    /* Evaluate column path */
4969
    xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4970
    if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4971
      xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4972
            "could not create XPath object");
4973
4974
    /*
4975
     * There are four possible cases, depending on the number of nodes
4976
     * returned by the XPath expression and the type of the target column:
4977
     * a) XPath returns no nodes.  b) The target type is XML (return all
4978
     * as XML).  For non-XML return types:  c) One node (return content).
4979
     * d) Multiple nodes (error).
4980
     */
4981
    if (xpathobj->type == XPATH_NODESET)
4982
    {
4983
      int     count = 0;
4984
4985
      if (xpathobj->nodesetval != NULL)
4986
        count = xpathobj->nodesetval->nodeNr;
4987
4988
      if (xpathobj->nodesetval == NULL || count == 0)
4989
      {
4990
        *isnull = true;
4991
      }
4992
      else
4993
      {
4994
        if (typid == XMLOID)
4995
        {
4996
          text     *textstr;
4997
          StringInfoData str;
4998
4999
          /* Concatenate serialized values */
5000
          initStringInfo(&str);
5001
          for (int i = 0; i < count; i++)
5002
          {
5003
            textstr =
5004
              xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
5005
                         xtCxt->xmlerrcxt);
5006
5007
            appendStringInfoText(&str, textstr);
5008
          }
5009
          cstr = str.data;
5010
        }
5011
        else
5012
        {
5013
          xmlChar    *str;
5014
5015
          if (count > 1)
5016
            ereport(ERROR,
5017
                (errcode(ERRCODE_CARDINALITY_VIOLATION),
5018
                 errmsg("more than one value returned by column XPath expression")));
5019
5020
          str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5021
          cstr = str ? xml_pstrdup_and_free(str) : "";
5022
        }
5023
      }
5024
    }
5025
    else if (xpathobj->type == XPATH_STRING)
5026
    {
5027
      /* Content should be escaped when target will be XML */
5028
      if (typid == XMLOID)
5029
        cstr = escape_xml((char *) xpathobj->stringval);
5030
      else
5031
        cstr = (char *) xpathobj->stringval;
5032
    }
5033
    else if (xpathobj->type == XPATH_BOOLEAN)
5034
    {
5035
      char    typcategory;
5036
      bool    typispreferred;
5037
      xmlChar    *str;
5038
5039
      /* Allow implicit casting from boolean to numbers */
5040
      get_type_category_preferred(typid, &typcategory, &typispreferred);
5041
5042
      if (typcategory != TYPCATEGORY_NUMERIC)
5043
        str = xmlXPathCastBooleanToString(xpathobj->boolval);
5044
      else
5045
        str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5046
5047
      cstr = xml_pstrdup_and_free(str);
5048
    }
5049
    else if (xpathobj->type == XPATH_NUMBER)
5050
    {
5051
      xmlChar    *str;
5052
5053
      str = xmlXPathCastNumberToString(xpathobj->floatval);
5054
      cstr = xml_pstrdup_and_free(str);
5055
    }
5056
    else
5057
      elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5058
5059
    /*
5060
     * By here, either cstr contains the result value, or the isnull flag
5061
     * has been set.
5062
     */
5063
    Assert(cstr || *isnull);
5064
5065
    if (!*isnull)
5066
      result = InputFunctionCall(&state->in_functions[colnum],
5067
                     cstr,
5068
                     state->typioparams[colnum],
5069
                     typmod);
5070
  }
5071
  PG_FINALLY();
5072
  {
5073
    if (xpathobj != NULL)
5074
      xmlXPathFreeObject(xpathobj);
5075
  }
5076
  PG_END_TRY();
5077
5078
  return result;
5079
#else
5080
0
  NO_XML_SUPPORT();
5081
0
  return 0;
5082
0
#endif              /* not USE_LIBXML */
5083
0
}
5084
5085
/*
5086
 * XmlTableDestroyOpaque
5087
 *    Release all libxml2 resources
5088
 */
5089
static void
5090
XmlTableDestroyOpaque(TableFuncScanState *state)
5091
0
{
5092
#ifdef USE_LIBXML
5093
  XmlTableBuilderData *xtCxt;
5094
5095
  xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5096
5097
  /* Propagate our own error context to libxml2 */
5098
  xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5099
5100
  if (xtCxt->xpathscomp != NULL)
5101
  {
5102
    int     i;
5103
5104
    for (i = 0; i < xtCxt->natts; i++)
5105
      if (xtCxt->xpathscomp[i] != NULL)
5106
        xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5107
  }
5108
5109
  if (xtCxt->xpathobj != NULL)
5110
    xmlXPathFreeObject(xtCxt->xpathobj);
5111
  if (xtCxt->xpathcomp != NULL)
5112
    xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5113
  if (xtCxt->xpathcxt != NULL)
5114
    xmlXPathFreeContext(xtCxt->xpathcxt);
5115
  if (xtCxt->doc != NULL)
5116
    xmlFreeDoc(xtCxt->doc);
5117
  if (xtCxt->ctxt != NULL)
5118
    xmlFreeParserCtxt(xtCxt->ctxt);
5119
5120
  pg_xml_done(xtCxt->xmlerrcxt, true);
5121
5122
  /* not valid anymore */
5123
  xtCxt->magic = 0;
5124
  state->opaque = NULL;
5125
5126
#else
5127
0
  NO_XML_SUPPORT();
5128
0
#endif              /* not USE_LIBXML */
5129
0
}