diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a9150e85692..a358de68728 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -48,6 +48,7 @@ #ifdef USE_LIBXML #include #include +#include #include #include #include @@ -99,8 +100,12 @@ struct PgXmlErrorContext /* previous libxml error handling state (saved by pg_xml_init) */ xmlStructuredErrorFunc saved_errfunc; void *saved_errcxt; + /* previous libxml entity handler (saved by pg_xml_init) */ + xmlExternalEntityLoader saved_entityfunc; }; +static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt); static void xml_errorHandler(void *data, xmlErrorPtr error); static void xml_ereport_by_code(int level, int sqlcode, const char *msg, int errcode); @@ -985,6 +990,13 @@ pg_xml_init(PgXmlStrictness strictness) " being used is not compatible with the libxml2" " header files that PostgreSQL was built with."))); + /* + * Also, install an entity loader to prevent unwanted fetches of external + * files and URLs. + */ + errcxt->saved_entityfunc = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(xmlPgEntityLoader); + return errcxt; } @@ -1027,8 +1039,9 @@ pg_xml_done(PgXmlErrorContext *errcxt, bool isError) if (cur_errcxt != (void *) errcxt) elog(WARNING, "libxml error handling state is out of sync with xml.c"); - /* Restore the saved handler */ + /* Restore the saved handlers */ xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc); + xmlSetExternalEntityLoader(errcxt->saved_entityfunc); /* * Mark the struct as invalid, just in case somebody somehow manages to @@ -1472,6 +1485,25 @@ xml_pstrdup(const char *string) #endif /* USE_LIBXMLCONTEXT */ +/* + * xmlPgEntityLoader --- entity loader callback function + * + * Silently prevent any external entity URL from being loaded. We don't want + * to throw an error, so instead make the entity appear to expand to an empty + * string. + * + * We would prefer to allow loading entities that exist in the system's + * global XML catalog; but the available libxml2 APIs make that a complex + * and fragile task. For now, just shut down all external access. + */ +static xmlParserInputPtr +xmlPgEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) +{ + return xmlNewStringInputStream(ctxt, (const xmlChar *) ""); +} + + /* * xml_ereport --- report an XML-related error * @@ -1566,7 +1598,14 @@ xml_errorHandler(void *data, xmlErrorPtr error) case XML_FROM_NONE: case XML_FROM_MEMORY: case XML_FROM_IO: - /* Accept error regardless of the parsing purpose */ + /* + * Suppress warnings about undeclared entities. We need to do + * this to avoid problems due to not loading DTD definitions. + */ + if (error->code == XML_WAR_UNDECLARED_ENTITY) + return; + + /* Otherwise, accept error regardless of the parsing purpose */ break; default: diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 5dfa44b5b99..4f1e3972dcb 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -896,3 +896,23 @@ CONTEXT: SQL function "xpath" statement 1 {""} (1 row) +-- External entity references should not leak filesystem information. +SELECT XMLPARSE(DOCUMENT ']>&c;'); + xmlparse +----------------------------------------------------------------- + ]>&c; +(1 row) + +SELECT XMLPARSE(DOCUMENT ']>&c;'); + xmlparse +----------------------------------------------------------------------- + ]>&c; +(1 row) + +-- This might or might not load the requested DTD, but it mustn't throw error. +SELECT XMLPARSE(DOCUMENT ' '); + xmlparse +------------------------------------------------------------------------------------------------------------------------------------------------------ +   +(1 row) + diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index c6c0e7ac887..044c5529ef2 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -789,3 +789,17 @@ LINE 1: SELECT xpath('/*', ''); ^ DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +-- External entity references should not leak filesystem information. +SELECT XMLPARSE(DOCUMENT ']>&c;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT XMLPARSE(DOCUMENT ']>&c;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +-- This might or might not load the requested DTD, but it mustn't throw error. +SELECT XMLPARSE(DOCUMENT ' '); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 3623dbc254e..90d4d67f04e 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -259,3 +259,9 @@ SELECT xpath('/*', ''); -- XPath deprecates relative namespaces, but they're not supposed to -- throw an error, only a warning. SELECT xpath('/*', ''); + +-- External entity references should not leak filesystem information. +SELECT XMLPARSE(DOCUMENT ']>&c;'); +SELECT XMLPARSE(DOCUMENT ']>&c;'); +-- This might or might not load the requested DTD, but it mustn't throw error. +SELECT XMLPARSE(DOCUMENT ' ');