diff --git a/ChangeLog b/ChangeLog index cbc24aaf..c286cf46 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Aug 22 18:27:47 CEST 2001 Daniel Veillard + + * include/libxml/catalog.h catalog.c xmlIO.c HTMLparser.c: + Added the part about section 7.2 on URI resolution, + fixed a side effect in the HTML parser, look complete + and ready to rock except the URI/SystemID part! + Wed Aug 22 16:27:03 CEST 2001 Daniel Veillard * include/libxml/catalog.h include/libxml/parser.h diff --git a/HTMLparser.c b/HTMLparser.c index 6824ddd1..d13f8c18 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -3739,6 +3739,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) ctxt->validate = 0; ctxt->nbChars = 0; ctxt->checkIndex = 0; + ctxt->catalogs = NULL; xmlInitNodeInfoSeq(&ctxt->node_seq); } diff --git a/catalog.c b/catalog.c index 06b2b53b..fba077b4 100644 --- a/catalog.c +++ b/catalog.c @@ -319,7 +319,6 @@ xmlCatalogUnWrapURN(const xmlChar *urn) { static xmlCatalogEntryPtr xmlParseXMLCatalogFile(xmlCatalogPrefer prefer, const xmlChar *filename); - static xmlCatalogEntryPtr xmlParseXMLCatalog(const xmlChar *value, xmlCatalogPrefer prefer, const char *file); @@ -329,6 +328,9 @@ xmlParseXMLCatalogNodeList(xmlNodePtr cur, xmlCatalogPrefer prefer, static xmlChar * xmlCatalogListXMLResolve(xmlCatalogEntryPtr catal, const xmlChar *pubID, const xmlChar *sysID); +static xmlChar * +xmlCatalogListXMLResolveURI(xmlCatalogEntryPtr catal, const xmlChar *URI); + static xmlCatalogEntryType xmlGetXMLCatalogEntryType(const xmlChar *name) { @@ -1104,6 +1106,127 @@ xmlCatalogXMLResolve(xmlCatalogEntryPtr catal, const xmlChar *pubID, return(NULL); } +/** + * xmlCatalogXMLResolveURI: + * @catal: a catalog list + * @URI: the URI + * @sysId: the system ID string + * + * Do a complete resolution lookup of an External Identifier for a + * list of catalog entries. + * + * Implements (or tries to) 7.2.2. URI Resolution + * from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Returns the URI of the resource or NULL if not found + */ +static xmlChar * +xmlCatalogXMLResolveURI(xmlCatalogEntryPtr catal, const xmlChar *URI) { + xmlChar *ret = NULL; + xmlCatalogEntryPtr cur; + int haveDelegate = 0; + int haveNext = 0; + xmlCatalogEntryPtr rewrite = NULL; + int lenrewrite = 0, len; + + if (catal == NULL) + return(NULL); + + if (URI == NULL) + return(NULL); + + /* + * First tries steps 2/ 3/ 4/ if a system ID is provided. + */ + cur = catal; + haveDelegate = 0; + while (cur != NULL) { + switch (cur->type) { + case XML_CATA_URI: + if (xmlStrEqual(URI, cur->name)) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Found URI match %s\n", cur->name); + return(xmlStrdup(cur->value)); + } + break; + case XML_CATA_REWRITE_URI: + len = xmlStrlen(cur->name); + if ((len > lenrewrite) && + (!xmlStrncmp(URI, cur->name, len))) { + lenrewrite = len; + rewrite = cur; + } + break; + case XML_CATA_DELEGATE_URI: + if (!xmlStrncmp(URI, cur->name, xmlStrlen(cur->name))) + haveDelegate++; + break; + case XML_CATA_NEXT_CATALOG: + haveNext++; + break; + default: + break; + } + cur = cur->next; + } + if (rewrite != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Using rewriting rule %s\n", rewrite->name); + ret = xmlStrdup(rewrite->value); + if (ret != NULL) + ret = xmlStrcat(ret, &URI[lenrewrite]); + return(ret); + } + if (haveDelegate) { + /* + * Assume the entries have been sorted by decreasing substring + * matches when the list was produced. + */ + cur = catal; + while (cur != NULL) { + if ((cur->type == XML_CATA_DELEGATE_SYSTEM) && + (!xmlStrncmp(URI, cur->name, xmlStrlen(cur->name)))) { + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Trying URI delegate %s\n", cur->value); + ret = xmlCatalogListXMLResolveURI(cur->children, URI); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + /* + * Apply the cut algorithm explained in 4/ + */ + return(XML_CATAL_BREAK); + } + if (haveNext) { + cur = catal; + while (cur != NULL) { + if (cur->type == XML_CATA_NEXT_CATALOG) { + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + ret = xmlCatalogListXMLResolveURI(cur->children, URI); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + } + + return(NULL); +} + /** * xmlCatalogListXMLResolve: * @catal: a catalog list @@ -1181,6 +1304,59 @@ xmlCatalogListXMLResolve(xmlCatalogEntryPtr catal, const xmlChar *pubID, return(ret); } +/** + * xmlCatalogListXMLResolveURI: + * @catal: a catalog list + * @URI: the URI + * + * Do a complete resolution lookup of an URI for a list of catalogs + * + * Implements (or tries to) 7.2. URI Resolution + * from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Returns the URI of the resource or NULL if not found + */ +static xmlChar * +xmlCatalogListXMLResolveURI(xmlCatalogEntryPtr catal, const xmlChar *URI) { + xmlChar *ret = NULL; + xmlChar *urnID = NULL; + + if (catal == NULL) + return(NULL); + if (URI == NULL) + return(NULL); + + if (!xmlStrncmp(URI, BAD_CAST XML_URN_PUBID, sizeof(XML_URN_PUBID) - 1)) { + urnID = xmlCatalogUnWrapURN(URI); + if (xmlDebugCatalogs) { + if (urnID == NULL) + xmlGenericError(xmlGenericErrorContext, + "URN ID %s expanded to NULL\n", URI); + else + xmlGenericError(xmlGenericErrorContext, + "URN ID expanded to %s\n", urnID); + } + ret = xmlCatalogListXMLResolve(catal, urnID, NULL); + if (urnID != NULL) + xmlFree(urnID); + return(ret); + } + while (catal != NULL) { + if (catal->type == XML_CATA_CATALOG) { + if (catal->children == NULL) { + xmlFetchXMLCatalogFile(catal); + } + if (catal->children != NULL) { + ret = xmlCatalogXMLResolveURI(catal->children, URI); + if (ret != NULL) + return(ret); + } + } + catal = catal->next; + } + return(ret); +} + /************************************************************************ * * * The SGML Catalog parser * @@ -1946,6 +2122,32 @@ xmlCatalogResolve(const xmlChar *pubID, const xmlChar *sysID) { return(NULL); } +/** + * xmlCatalogResolveURI: + * @pubId: the URI + * + * Do a complete resolution lookup of an URI + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlCatalogResolveURI(const xmlChar *URI) { + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + if (xmlDefaultXMLCatalogList != NULL) { + return(xmlCatalogListXMLResolveURI(xmlDefaultXMLCatalogList, URI)); + } else { + const xmlChar *ret; + + ret = xmlCatalogSGMLResolve(NULL, URI); + if (ret != NULL) + return(xmlStrdup(ret)); + } + return(NULL); +} + /** * xmlCatalogDump: * @out: the file. @@ -2202,4 +2404,27 @@ xmlCatalogLocalResolve(void *catalogs, const xmlChar *pubID, return(xmlCatalogListXMLResolve(catal, pubID, sysID)); } +/** + * xmlCatalogLocalResolveURI: + * @catalogs: a document's list of catalogs + * @pubId: the URI + * + * Do a complete resolution lookup of an URI using a + * document's private catalog list + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlCatalogLocalResolveURI(void *catalogs, const xmlChar *URI) { + xmlCatalogEntryPtr catal; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + catal = (xmlCatalogEntryPtr) catalogs; + if (catal == NULL) + return(NULL); + return(xmlCatalogListXMLResolveURI(catal, URI)); +} + #endif /* LIBXML_CATALOG_ENABLED */ diff --git a/include/libxml/catalog.h b/include/libxml/catalog.h index d39fb60e..57afcaf2 100644 --- a/include/libxml/catalog.h +++ b/include/libxml/catalog.h @@ -63,6 +63,7 @@ xmlChar * xmlCatalogResolve (const xmlChar *pubID, const xmlChar *sysID); xmlChar * xmlCatalogResolveSystem (const xmlChar *sysID); xmlChar * xmlCatalogResolvePublic (const xmlChar *pubID); +xmlChar * xmlCatalogResolveURI (const xmlChar *URI); int xmlCatalogAdd (const xmlChar *type, const xmlChar *orig, const xmlChar *replace); @@ -78,6 +79,8 @@ void * xmlCatalogAddLocal (void *catalogs, xmlChar * xmlCatalogLocalResolve (void *catalogs, const xmlChar *pubID, const xmlChar *sysID); +xmlChar * xmlCatalogLocalResolveURI(void *catalogs, + const xmlChar *URI); /* * Preference settings */ diff --git a/xmlIO.c b/xmlIO.c index bff4479d..72a56bf7 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -2384,7 +2384,7 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID, #ifdef LIBXML_CATALOG_ENABLED /* * If the resource doesn't exists as a file, - * try to load it from the resource pointed in the catalog + * try to load it from the resource pointed in the catalogs */ pref = xmlCatalogGetDefaults(); @@ -2404,21 +2404,43 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID, (const xmlChar *)URL); } /* - * Do a global lookup + * Try a global lookup */ - if (((resource == NULL) -#ifdef HAVE_STAT - || (stat((const char *) resource, &info) < 0) -#endif - ) && ((pref == XML_CATA_ALLOW_ALL) || - (pref == XML_CATA_ALLOW_GLOBAL))) { - + if ((resource == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { resource = xmlCatalogResolve((const xmlChar *)ID, (const xmlChar *)URL); } + if ((resource == NULL) && (URL != NULL)) + resource = xmlStrdup(URL); + /* * TODO: do an URI lookup on the reference */ + if ((resource != NULL) +#ifdef HAVE_STAT + && (stat((const char *) resource, &info) < 0) +#endif + ) { + xmlChar *tmp = NULL; + + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource); + } + if ((tmp == NULL) && + (pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL)) { + tmp = xmlCatalogResolveURI(resource); + } + + if (tmp != NULL) { + xmlFree(resource); + resource = tmp; + } + } } #endif @@ -2445,7 +2467,7 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID, ctxt->sax->warning(ctxt, "failed to load external entity \"%s\"\n", resource); } - if (resource != (xmlChar *) URL) + if ((resource != NULL) && (resource != (xmlChar *) URL)) xmlFree(resource); return(ret); }