Make our back branches compatible with libxml2 2.13.x.

This back-patches HEAD commits 066e8ac6e, 6082b3d5d, e7192486d,
and 896cd266f into supported branches.  Changes:

* Use xmlAddChildList not xmlAddChild in XMLSERIALIZE
(affects v16 and up only).  This was a flat-out coding mistake
that we got away with due to lax checking in previous versions
of xmlAddChild.

* Use xmlParseInNodeContext not xmlParseBalancedChunkMemory.
This is to dodge a bug in xmlParseBalancedChunkMemory in libxm2
releases 2.13.0-2.13.2.  While that bug is now fixed upstream and
will probably never be seen in any production-oriented distro, it is
currently a problem on some more-bleeding-edge-friendly platforms.

* Suppress "chunk is not well balanced" errors from libxml2,
unless it is the only error.  This eliminates an error-reporting
discrepancy between 2.13 and older releases.  This error is
almost always redundant with previous errors, if not flat-out
inappropriate, which is why 2.13 changed the behavior and why
nobody's likely to miss it.

Erik Wienhold and Tom Lane, per report from Frank Streitzig.

Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25
Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01
This commit is contained in:
Tom Lane 2024-07-10 20:15:52 -04:00
parent c774d31f84
commit 48132587d9
3 changed files with 58 additions and 31 deletions

View File

@ -1545,6 +1545,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
PG_TRY();
{
bool parse_as_document = false;
int options;
int res_code;
size_t count = 0;
xmlChar *version = NULL;
@ -1552,11 +1553,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
xmlInitParser();
ctxt = xmlNewParserCtxt();
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
/* Decide whether to parse as document or content */
if (xmloption_arg == XMLOPTION_DOCUMENT)
parse_as_document = true;
@ -1575,20 +1571,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
parse_as_document = true;
}
/*
* Select parse options.
*
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
* internal DTD are applied'. As for external DTDs, we try to support
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
* happen because xmlPgEntityLoader prevents it.
*/
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
if (parse_as_document)
{
/*
* Note, that here we try to apply DTD defaults
* (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
* 'Default values defined by internal DTD are applied'. As for
* external DTDs, we try to support them too, (see SQL/XML:2008 GR
* 10.16.7.e)
*/
ctxt = xmlNewParserCtxt();
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
doc = xmlCtxtReadDoc(ctxt, utf8string,
NULL,
NULL, /* no URL */
"UTF-8",
XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
options);
if (doc == NULL || xmlerrcxt->err_occurred)
{
/* Use original option to decide which error code to throw */
@ -1602,17 +1608,36 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
}
else
{
xmlNodePtr root;
/* set up document with empty root node to be the context node */
doc = xmlNewDoc(version);
Assert(doc->encoding == NULL);
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
doc->standalone = standalone;
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
if (root == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
/* This attaches root to doc, so we need not free it separately. */
xmlDocSetRootElement(doc, root);
/* allow empty content */
if (*(utf8string + count))
{
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
utf8string + count, NULL);
if (res_code != 0 || xmlerrcxt->err_occurred)
xmlNodePtr node_list = NULL;
xmlParserErrors res;
res = xmlParseInNodeContext(root,
(char *) utf8string + count,
strlen((char *) utf8string + count),
options,
&node_list);
xmlFreeNodeList(node_list);
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
"invalid XML content");
}
@ -1631,7 +1656,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
}
PG_END_TRY();
xmlFreeParserCtxt(ctxt);
if (ctxt != NULL)
xmlFreeParserCtxt(ctxt);
pg_xml_done(xmlerrcxt, false);
@ -1812,6 +1838,19 @@ xml_errorHandler(void *data, PgXmlErrorPtr error)
switch (domain)
{
case XML_FROM_PARSER:
/*
* XML_ERR_NOT_WELL_BALANCED is typically reported after some
* other, more on-point error. Furthermore, libxml2 2.13 reports
* it under a completely different set of rules than prior
* versions. To avoid cross-version behavioral differences,
* suppress it so long as we already logged some error.
*/
if (error->code == XML_ERR_NOT_WELL_BALANCED &&
xmlerrcxt->err_occurred)
return;
/* fall through */
case XML_FROM_NONE:
case XML_FROM_MEMORY:
case XML_FROM_IO:

View File

@ -223,17 +223,11 @@ ERROR: invalid XML content
DETAIL: line 1: xmlParseEntityRef: no name
<invalidentity>&</invalidentity>
^
line 1: chunk is not well balanced
<invalidentity>&</invalidentity>
^
SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
ERROR: invalid XML content
DETAIL: line 1: Entity 'idontexist' not defined
<undefinedentity>&idontexist;</undefinedentity>
^
line 1: chunk is not well balanced
<undefinedentity>&idontexist;</undefinedentity>
^
SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
xmlparse
---------------------------
@ -252,9 +246,6 @@ DETAIL: line 1: Entity 'idontexist' not defined
<twoerrors>&idontexist;</unbalanced>
^
line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
<twoerrors>&idontexist;</unbalanced>
^
line 1: chunk is not well balanced
<twoerrors>&idontexist;</unbalanced>
^
SELECT xmlparse(content '<nosuchprefix:tag/>');

View File

@ -219,13 +219,11 @@ ERROR: invalid XML content
DETAIL: line 1: xmlParseEntityRef: no name
<invalidentity>&</invalidentity>
^
line 1: chunk is not well balanced
SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
ERROR: invalid XML content
DETAIL: line 1: Entity 'idontexist' not defined
<undefinedentity>&idontexist;</undefinedentity>
^
line 1: chunk is not well balanced
SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
xmlparse
---------------------------
@ -244,7 +242,6 @@ DETAIL: line 1: Entity 'idontexist' not defined
<twoerrors>&idontexist;</unbalanced>
^
line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
line 1: chunk is not well balanced
SELECT xmlparse(content '<nosuchprefix:tag/>');
xmlparse
---------------------