Skip to content

Commit 849c86e

Browse files
Robin HaberkornCommitfest Bot
authored andcommitted
contrib/xml2: overloaded xslt_process() to provide variants for xmltype and specifying parameters in arrays
* There are apparently no functions that accept XML as text, except for xmlparse(). xslt_process() should therefore also accept xmltype. * A version accepting text is still kept for backwards compatibility, but is considered deprecated. * The new xmltype-based version expects an array of stylesheet parameter-value pairs, which is less limited than the now deprecated way of encoding all stylesheet parameters into a single text argument. We can now accept an arbitrary number of parameters and you can include `=` and `,` signs in both the key and value strings. Hstores haven't been used since they are in a module and we don't want to depend on any additional module. * The new implementation respects the database's encoding - text strings are always converted to UTF8 before passing them into libxml2. * On the downside, xml_parse() had to be made an external function. Since a declaration cannot be added to xml.h without drawing in libxml2 headers, the declaration is repeated in xslt_proc.c. Perhaps xml_parse() should be declared in a separate internal header? * xmlCtxtReadDoc() now sets a dummy "SQL" URL to preserve line numbers in XSLT stylesheet errors. This change at least does not break the test suite.
1 parent 280f87b commit 849c86e

File tree

6 files changed

+172
-46
lines changed

6 files changed

+172
-46
lines changed

contrib/xml2/expected/xml2.out

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,16 @@ Variable 'n1' has not been declared.
278278
Undefined variable
279279
runtime error: file SQL line 3 element value-of
280280
XPath evaluation returned no result.
281+
-- xmltype and Array-based signature
282+
SELECT xslt_process(xmlelement(name xml),
283+
$$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
284+
<template match="/">
285+
<value-of select="$n1"/>
286+
</template>
287+
</stylesheet>$$::xml, ARRAY['n1','"foo"']);
288+
xslt_process
289+
--------------
290+
foo +
291+
292+
(1 row)
293+

contrib/xml2/sql/xml2.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,11 @@ $$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
161161
<value-of select="$n1"/>
162162
</template>
163163
</stylesheet>$$)::xml;
164+
165+
-- xmltype and Array-based signature
166+
SELECT xslt_process(xmlelement(name xml),
167+
$$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
168+
<template match="/">
169+
<value-of select="$n1"/>
170+
</template>
171+
</stylesheet>$$::xml, ARRAY['n1','"foo"']);

contrib/xml2/xml2--1.1.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,14 @@ CREATE FUNCTION xslt_process(text,text)
7171
RETURNS text
7272
AS 'MODULE_PATHNAME'
7373
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
74+
75+
CREATE FUNCTION xslt_process(xml,xml,text[])
76+
RETURNS xml
77+
AS 'MODULE_PATHNAME','xslt_process_xmltype'
78+
LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
79+
80+
-- the function checks for the correct argument count
81+
CREATE FUNCTION xslt_process(xml,xml)
82+
RETURNS xml
83+
AS 'MODULE_PATHNAME','xslt_process_xmltype'
84+
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;

contrib/xml2/xslt_proc.c

Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#include "fmgr.h"
1111
#include "utils/builtins.h"
1212
#include "utils/xml.h"
13+
#include "utils/array.h"
14+
#include "utils/memutils.h"
15+
#include "mb/pg_wchar.h"
1316

1417
#ifdef USE_LIBXSLT
1518

@@ -35,9 +38,18 @@
3538
extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
3639

3740
/* local defs */
41+
static xmltype *xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params);
3842
static const char **parse_params(text *paramstr);
3943
#endif /* USE_LIBXSLT */
4044

45+
/*
46+
* FIXME: This cannot easily be exposed in xml.h.
47+
* Perhaps there should be an xml-internal.h?
48+
*/
49+
xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
50+
bool preserve_whitespace, int encoding,
51+
XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
52+
Node *escontext);
4153

4254
PG_FUNCTION_INFO_V1(xslt_process);
4355

@@ -48,9 +60,103 @@ xslt_process(PG_FUNCTION_ARGS)
4860

4961
text *doct = PG_GETARG_TEXT_PP(0);
5062
text *ssheet = PG_GETARG_TEXT_PP(1);
51-
text *volatile result = NULL;
52-
text *paramstr;
53-
const char **params;
63+
const char **params = NULL;
64+
text *result;
65+
66+
if (fcinfo->nargs == 3)
67+
{
68+
text *paramstr = PG_GETARG_TEXT_PP(2);
69+
70+
params = parse_params(paramstr);
71+
}
72+
73+
result = xslt_process_internal(doct, ssheet, params);
74+
75+
PG_RETURN_TEXT_P(result);
76+
77+
#else /* !USE_LIBXSLT */
78+
79+
ereport(ERROR,
80+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
81+
errmsg("xslt_process() is not available without libxslt")));
82+
PG_RETURN_NULL();
83+
84+
#endif /* USE_LIBXSLT */
85+
}
86+
87+
PG_FUNCTION_INFO_V1(xslt_process_xmltype);
88+
89+
Datum
90+
xslt_process_xmltype(PG_FUNCTION_ARGS)
91+
{
92+
#ifdef USE_LIBXSLT
93+
94+
xmltype *doct = PG_GETARG_XML_P(0);
95+
xmltype *ssheet = PG_GETARG_XML_P(1);
96+
const char **params = NULL;
97+
xmltype *result;
98+
99+
/*
100+
* Parameters are key-value pairs. The values are XPath expressions, so
101+
* strings will have to be escaped with single or double quotes. Even
102+
* `xsltproc --stringparam` does nothing else than adding single or double
103+
* quotes and fails if the value contains both.
104+
*/
105+
if (fcinfo->nargs == 3)
106+
{
107+
ArrayType *paramarray = PG_GETARG_ARRAYTYPE_P(2);
108+
Datum *arr_datums;
109+
bool *arr_nulls;
110+
int arr_count;
111+
int i,
112+
j;
113+
114+
deconstruct_array_builtin(paramarray, TEXTOID, &arr_datums, &arr_nulls, &arr_count);
115+
116+
if ((arr_count % 2) != 0)
117+
ereport(ERROR,
118+
(errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
119+
errmsg("number of stylesheet parameters (%d) must be a multiple of 2",
120+
arr_count)));
121+
122+
params = palloc_array(const char *, arr_count + 1);
123+
124+
for (i = 0, j = 0; i < arr_count; i++)
125+
{
126+
char *cstr;
127+
128+
if (arr_nulls[i])
129+
continue;
130+
131+
cstr = TextDatumGetCString(arr_datums[i]);
132+
params[j++] = (char *) pg_do_encoding_conversion((unsigned char *) cstr,
133+
strlen(cstr),
134+
GetDatabaseEncoding(),
135+
PG_UTF8);
136+
}
137+
params[j] = NULL;
138+
}
139+
140+
result = xslt_process_internal(doct, ssheet, params);
141+
142+
PG_RETURN_XML_P(result);
143+
144+
#else /* !USE_LIBXSLT */
145+
146+
ereport(ERROR,
147+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
148+
errmsg("xslt_process() is not available without libxslt")));
149+
PG_RETURN_NULL();
150+
151+
#endif /* USE_LIBXSLT */
152+
}
153+
154+
#ifdef USE_LIBXSLT
155+
156+
static xmltype *
157+
xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params)
158+
{
159+
text *volatile result;
54160
PgXmlErrorContext *xmlerrcxt;
55161
volatile xsltStylesheetPtr stylesheet = NULL;
56162
volatile xmlDocPtr doctree = NULL;
@@ -64,18 +170,6 @@ xslt_process(PG_FUNCTION_ARGS)
64170
xmlGenericErrorFunc saved_errfunc;
65171
void *saved_errcxt;
66172

67-
if (fcinfo->nargs == 3)
68-
{
69-
paramstr = PG_GETARG_TEXT_PP(2);
70-
params = parse_params(paramstr);
71-
}
72-
else
73-
{
74-
/* No parameters */
75-
params = (const char **) palloc(sizeof(char *));
76-
params[0] = NULL;
77-
}
78-
79173
/* Setup parser */
80174
xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL);
81175

@@ -93,21 +187,18 @@ xslt_process(PG_FUNCTION_ARGS)
93187
int reslen = 0;
94188

95189
/*
96-
* Parse document. It's important to set an "URL", so libxslt includes
97-
* line numbers in error messages (cf. xsltPrintErrorContext()).
190+
* Parse document.
98191
*/
99-
doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
100-
VARSIZE_ANY_EXHDR(doct), "SQL", NULL,
101-
XML_PARSE_NOENT);
192+
doctree = xml_parse(doct, XMLOPTION_DOCUMENT, true,
193+
GetDatabaseEncoding(), NULL, NULL, NULL);
102194

103195
if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt))
104196
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
105197
"error parsing XML document");
106198

107199
/* Same for stylesheet */
108-
ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
109-
VARSIZE_ANY_EXHDR(ssheet), "SQL", NULL,
110-
XML_PARSE_NOENT);
200+
ssdoc = xml_parse(ssheet, XMLOPTION_DOCUMENT, true,
201+
GetDatabaseEncoding(), NULL, NULL, NULL);
111202

112203
if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt))
113204
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
@@ -198,18 +289,9 @@ xslt_process(PG_FUNCTION_ARGS)
198289
xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc);
199290
pg_xml_done(xmlerrcxt, false);
200291

201-
PG_RETURN_TEXT_P(result);
202-
#else /* !USE_LIBXSLT */
203-
204-
ereport(ERROR,
205-
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
206-
errmsg("xslt_process() is not available without libxslt")));
207-
PG_RETURN_NULL();
208-
#endif /* USE_LIBXSLT */
292+
return result;
209293
}
210294

211-
#ifdef USE_LIBXSLT
212-
213295
static const char **
214296
parse_params(text *paramstr)
215297
{

doc/src/sgml/xml2.sgml

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -408,22 +408,29 @@ ORDER BY doc_num, line_num;
408408
</indexterm>
409409

410410
<synopsis>
411-
xslt_process(text document, text stylesheet, text paramlist) returns text
411+
xslt_process(xml document, xml stylesheet, text[] paramlist) returns xml
412412
</synopsis>
413413

414414
<para>
415415
This function applies the XSL stylesheet to the document and returns
416-
the transformed result. The <literal>paramlist</literal> is a list of parameter
417-
assignments to be used in the transformation, specified in the form
418-
<literal>a=1,b=2</literal>. Note that the
419-
parameter parsing is very simple-minded: parameter values cannot
420-
contain commas!
416+
the transformed result. The <literal>paramlist</literal> is an array of parameter
417+
assignments to be used in the transformation, specified in pairs of
418+
key and value strings (e.g. <literal>ARRAY['a','1', 'b','2']</literal>).
419+
The length of the array must be even.
420+
Note that the values are still interpreted as XPath expressions, so string values need to
421+
be quoted in single or double quotes (e.g. <literal>ARRAY['a','"string"']</literal>).
421422
</para>
422423

423424
<para>
424425
There is also a two-parameter version of <function>xslt_process</function> which
425426
does not pass any parameters to the transformation.
426427
</para>
428+
429+
<para>
430+
<emphasis>Deprecated</emphasis> variants of <function>xslt_process</function> accepting
431+
text arguments and parameters encoded into single text strings
432+
(e.g. <literal>a=1,b=2</literal>) are also still available.
433+
</para>
427434
</sect3>
428435
</sect2>
429436

src/backend/utils/adt/xml.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
154154
static bool print_xml_decl(StringInfo buf, const xmlChar *version,
155155
pg_enc encoding, int standalone);
156156
static bool xml_doctype_in_content(const xmlChar *str);
157-
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
158-
bool preserve_whitespace, int encoding,
159-
XmlOptionType *parsed_xmloptiontype,
160-
xmlNodePtr *parsed_nodes,
161-
Node *escontext);
157+
xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
158+
bool preserve_whitespace, int encoding,
159+
XmlOptionType *parsed_xmloptiontype,
160+
xmlNodePtr *parsed_nodes,
161+
Node *escontext);
162162
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
163163
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
164164
ArrayBuildState *astate,
@@ -1782,7 +1782,7 @@ xml_doctype_in_content(const xmlChar *str)
17821782
* TODO maybe libxml2's xmlreader is better? (do not construct DOM,
17831783
* yet do not use SAX - see xmlreader.c)
17841784
*/
1785-
static xmlDocPtr
1785+
xmlDocPtr
17861786
xml_parse(text *data, XmlOptionType xmloption_arg,
17871787
bool preserve_whitespace, int encoding,
17881788
XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
@@ -1881,8 +1881,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18811881
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
18821882
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
18831883

1884+
/*
1885+
* Setting a dummy "SQL" URL is important for the
1886+
* xsltPrintErrorContext() when using the legacy text-based
1887+
* xslt_process() variant.
1888+
*/
18841889
doc = xmlCtxtReadDoc(ctxt, utf8string,
1885-
NULL, /* no URL */
1890+
"SQL",
18861891
"UTF-8",
18871892
options);
18881893

0 commit comments

Comments
 (0)