diff --git a/ChangeLog b/ChangeLog index 4caf31ec..a2233424 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Fri Sep 26 14:41:53 CEST 2003 Daniel Veillard + + * HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h: + added the same htmlRead APIs than their XML counterparts + * include/libxml/parser.h: new parser options, not yet implemented, + added an options field to the context. + * tree.c: patch from Shaun McCance to fix bug #123238 when ]]> + is found within a cdata section. + * result/noent/cdata2 result/cdata2 result/cdata2.rdr + result/cdata2.sax test/cdata2: add one more cdata test + Thu Sep 25 23:03:23 CEST 2003 Daniel Veillard * parser.c xmllint.c doc/libxml2-api.xml include/libxml/parser.h: diff --git a/HTMLparser.c b/HTMLparser.c index d2cba584..caed896b 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -5541,4 +5541,529 @@ htmlNodeStatus(const htmlNodePtr node, int legacy) { default: return HTML_NA ; } } +/************************************************************************ + * * + * New set (2.6.0) of simpler and more flexible APIs * + * * + ************************************************************************/ +/** + * DICT_FREE: + * @str: a string + * + * Free a string if it is not owned by the "dict" dictionnary in the + * current scope + */ +#define DICT_FREE(str) \ + if ((str) && ((!dict) || \ + (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ + xmlFree((char *)(str)); + +/** + * htmlCtxtReset: + * @ctxt: an XML parser context + * + * Reset a parser context + */ +void +htmlCtxtReset(htmlParserCtxtPtr ctxt) +{ + xmlParserInputPtr input; + xmlDictPtr dict = ctxt->dict; + + while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ + xmlFreeInputStream(input); + } + ctxt->inputNr = 0; + ctxt->input = NULL; + + ctxt->spaceNr = 0; + ctxt->spaceTab[0] = -1; + ctxt->space = &ctxt->spaceTab[0]; + + + ctxt->nodeNr = 0; + ctxt->node = NULL; + + ctxt->nameNr = 0; + ctxt->name = NULL; + + DICT_FREE(ctxt->version); + ctxt->version = NULL; + DICT_FREE(ctxt->encoding); + ctxt->encoding = NULL; + DICT_FREE(ctxt->directory); + ctxt->directory = NULL; + DICT_FREE(ctxt->extSubURI); + ctxt->extSubURI = NULL; + DICT_FREE(ctxt->extSubSystem); + ctxt->extSubSystem = NULL; + if (ctxt->myDoc != NULL) + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + + ctxt->standalone = -1; + ctxt->hasExternalSubset = 0; + ctxt->hasPErefs = 0; + ctxt->html = 1; + ctxt->external = 0; + ctxt->instate = XML_PARSER_START; + ctxt->token = 0; + + ctxt->wellFormed = 1; + ctxt->nsWellFormed = 1; + ctxt->valid = 1; + ctxt->vctxt.userData = ctxt; + ctxt->vctxt.error = xmlParserValidityError; + ctxt->vctxt.warning = xmlParserValidityWarning; + ctxt->record_info = 0; + ctxt->nbChars = 0; + ctxt->checkIndex = 0; + ctxt->inSubset = 0; + ctxt->errNo = XML_ERR_OK; + ctxt->depth = 0; + ctxt->charset = XML_CHAR_ENCODING_UTF8; + ctxt->catalogs = NULL; + xmlInitNodeInfoSeq(&ctxt->node_seq); + + if (ctxt->attsDefault != NULL) { + xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); + ctxt->attsDefault = NULL; + } + if (ctxt->attsSpecial != NULL) { + xmlHashFree(ctxt->attsSpecial, NULL); + ctxt->attsSpecial = NULL; + } +} + +/** + * htmlCtxtUseOptions: + * @ctxt: an HTML parser context + * @options: a combination of htmlParserOption(s) + * + * Applies the options to the parser context + * + * Returns 0 in case of success, the set of unknown or unimplemented options + * in case of error. + */ +int +htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) +{ + if (options & HTML_PARSE_NOWARNING) { + ctxt->sax->warning = NULL; + options -= XML_PARSE_NOWARNING; + } + if (options & HTML_PARSE_NOERROR) { + ctxt->sax->error = NULL; + ctxt->sax->fatalError = NULL; + options -= XML_PARSE_NOERROR; + } + if (options & HTML_PARSE_PEDANTIC) { + ctxt->pedantic = 1; + options -= XML_PARSE_PEDANTIC; + } else + ctxt->pedantic = 0; + if (options & XML_PARSE_NOBLANKS) { + ctxt->keepBlanks = 0; + ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; + options -= XML_PARSE_NOBLANKS; + } else + ctxt->keepBlanks = 1; + ctxt->dictNames = 0; + return (options); +} + +/** + * htmlDoRead: + * @ctxt: an HTML parser context + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * @reuse: keep the context for reuse + * + * Common front-end for the htmlRead functions + * + * Returns the resulting document tree or NULL + */ +static htmlDocPtr +htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, + int options, int reuse) +{ + htmlDocPtr ret; + + htmlCtxtUseOptions(ctxt, options); + ctxt->html = 1; + if (encoding != NULL) { + xmlCharEncodingHandlerPtr hdlr; + + hdlr = xmlFindCharEncodingHandler(encoding); + if (hdlr != NULL) + xmlSwitchToEncoding(ctxt, hdlr); + } + if ((URL != NULL) && (ctxt->input != NULL) && + (ctxt->input->filename == NULL)) + ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); + htmlParseDocument(ctxt); + ret = ctxt->myDoc; + ctxt->myDoc = NULL; + if (!reuse) { + if ((ctxt->dictNames) && + (ret != NULL) && + (ret->dict == ctxt->dict)) + ctxt->dict = NULL; + xmlFreeParserCtxt(ctxt); + } else { + /* Must duplicate the reference to the dictionary */ + if ((ctxt->dictNames) && + (ret != NULL) && + (ret->dict == ctxt->dict)) + xmlDictReference(ctxt->dict); + } + return (ret); +} + +/** + * htmlReadDoc: + * @cur: a pointer to a zero terminated string + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML in-memory document and build a tree. + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) +{ + htmlParserCtxtPtr ctxt; + + if (cur == NULL) + return (NULL); + + ctxt = xmlCreateDocParserCtxt(cur); + if (ctxt == NULL) + return (NULL); + return (htmlDoRead(ctxt, URL, encoding, options, 0)); +} + +/** + * htmlReadFile: + * @filename: a file or URL + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML file from the filesystem or the network. + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlReadFile(const char *filename, const char *encoding, int options) +{ + htmlParserCtxtPtr ctxt; + + ctxt = htmlCreateFileParserCtxt(filename, encoding); + if (ctxt == NULL) + return (NULL); + return (htmlDoRead(ctxt, NULL, NULL, options, 0)); +} + +/** + * htmlReadMemory: + * @buffer: a pointer to a char array + * @size: the size of the array + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML in-memory document and build a tree. + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) +{ + htmlParserCtxtPtr ctxt; + + ctxt = xmlCreateMemoryParserCtxt(buffer, size); + if (ctxt == NULL) + return (NULL); + return (htmlDoRead(ctxt, URL, encoding, options, 0)); +} + +/** + * htmlReadFd: + * @fd: an open file descriptor + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML from a file descriptor and build a tree. + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlReadFd(int fd, const char *URL, const char *encoding, int options) +{ + htmlParserCtxtPtr ctxt; + xmlParserInputBufferPtr input; + xmlParserInputPtr stream; + + if (fd < 0) + return (NULL); + + input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + if (input == NULL) + return (NULL); + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + xmlFreeParserInputBuffer(input); + return (NULL); + } + stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); + if (stream == NULL) { + xmlFreeParserInputBuffer(input); + xmlFreeParserCtxt(ctxt); + return (NULL); + } + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, URL, encoding, options, 0)); +} + +/** + * htmlReadIO: + * @ioread: an I/O read function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an HTML document from I/O functions and source and build a tree. + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, + void *ioctx, const char *URL, const char *encoding, int options) +{ + htmlParserCtxtPtr ctxt; + xmlParserInputBufferPtr input; + xmlParserInputPtr stream; + + if (ioread == NULL) + return (NULL); + + input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, + XML_CHAR_ENCODING_NONE); + if (input == NULL) + return (NULL); + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + xmlFreeParserInputBuffer(input); + return (NULL); + } + stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); + if (stream == NULL) { + xmlFreeParserInputBuffer(input); + xmlFreeParserCtxt(ctxt); + return (NULL); + } + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, URL, encoding, options, 0)); +} + +/** + * htmlCtxtReadDoc: + * @ctxt: an HTML parser context + * @cur: a pointer to a zero terminated string + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML in-memory document and build a tree. + * This reuses the existing @ctxt parser context + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, + const char *URL, const char *encoding, int options) +{ + xmlParserInputPtr stream; + + if (cur == NULL) + return (NULL); + if (ctxt == NULL) + return (NULL); + + htmlCtxtReset(ctxt); + + stream = xmlNewStringInputStream(ctxt, cur); + if (stream == NULL) { + return (NULL); + } + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, URL, encoding, options, 1)); +} + +/** + * htmlCtxtReadFile: + * @ctxt: an HTML parser context + * @filename: a file or URL + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML file from the filesystem or the network. + * This reuses the existing @ctxt parser context + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, + const char *encoding, int options) +{ + xmlParserInputPtr stream; + + if (filename == NULL) + return (NULL); + if (ctxt == NULL) + return (NULL); + + htmlCtxtReset(ctxt); + + stream = xmlNewInputFromFile(ctxt, filename); + if (stream == NULL) { + return (NULL); + } + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, NULL, encoding, options, 1)); +} + +/** + * htmlCtxtReadMemory: + * @ctxt: an HTML parser context + * @buffer: a pointer to a char array + * @size: the size of the array + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML in-memory document and build a tree. + * This reuses the existing @ctxt parser context + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, + const char *URL, const char *encoding, int options) +{ + xmlParserInputBufferPtr input; + xmlParserInputPtr stream; + + if (ctxt == NULL) + return (NULL); + if (buffer == NULL) + return (NULL); + + htmlCtxtReset(ctxt); + + input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); + if (input == NULL) { + return(NULL); + } + + stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); + if (stream == NULL) { + xmlFreeParserInputBuffer(input); + return(NULL); + } + + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, URL, encoding, options, 1)); +} + +/** + * htmlCtxtReadFd: + * @ctxt: an HTML parser context + * @fd: an open file descriptor + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an XML from a file descriptor and build a tree. + * This reuses the existing @ctxt parser context + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, + const char *URL, const char *encoding, int options) +{ + xmlParserInputBufferPtr input; + xmlParserInputPtr stream; + + if (fd < 0) + return (NULL); + if (ctxt == NULL) + return (NULL); + + htmlCtxtReset(ctxt); + + + input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + if (input == NULL) + return (NULL); + stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); + if (stream == NULL) { + xmlFreeParserInputBuffer(input); + return (NULL); + } + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, URL, encoding, options, 1)); +} + +/** + * htmlCtxtReadIO: + * @ctxt: an HTML parser context + * @ioread: an I/O read function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @URL: the base URL to use for the document + * @encoding: the document encoding, or NULL + * @options: a combination of htmlParserOption(s) + * + * parse an HTML document from I/O functions and source and build a tree. + * This reuses the existing @ctxt parser context + * + * Returns the resulting document tree + */ +htmlDocPtr +htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, void *ioctx, + const char *URL, + const char *encoding, int options) +{ + xmlParserInputBufferPtr input; + xmlParserInputPtr stream; + + if (ioread == NULL) + return (NULL); + if (ctxt == NULL) + return (NULL); + + htmlCtxtReset(ctxt); + + input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, + XML_CHAR_ENCODING_NONE); + if (input == NULL) + return (NULL); + stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); + if (stream == NULL) { + xmlFreeParserInputBuffer(input); + return (NULL); + } + inputPush(ctxt, stream); + return (htmlDoRead(ctxt, URL, encoding, options, 1)); +} + #endif /* LIBXML_HTML_ENABLED */ diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index 7e008bd5..66f2d809 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -154,6 +154,88 @@ XMLPUBFUN int XMLCALL int size, int terminate); +/* + * New set of simpler/more flexible APIs + */ +/** + * xmlParserOption: + * + * This is the set of XML parser options that can be passed down + * to the xmlReadDoc() and similar calls. + */ +typedef enum { + HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ + HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ + HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ + HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ + HTML_PARSE_NONET = 1<<11 /* Forbid network access */ +} htmlParserOption; + +XMLPUBFUN void XMLCALL + htmlCtxtReset (htmlParserCtxtPtr ctxt); +XMLPUBFUN int XMLCALL + htmlCtxtUseOptions (htmlParserCtxtPtr ctxt, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlReadDoc (const xmlChar *cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlReadFile (const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlReadMemory (const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlReadFd (int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlReadIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, + const xmlChar *cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlCtxtReadFile (xmlParserCtxtPtr ctxt, + const char *filename, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlCtxtReadMemory (xmlParserCtxtPtr ctxt, + const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlCtxtReadFd (xmlParserCtxtPtr ctxt, + int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr XMLCALL + htmlCtxtReadIO (xmlParserCtxtPtr ctxt, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); + /* NRK/Jan2003: further knowledge of HTML structure */ typedef enum { diff --git a/include/libxml/parser.h b/include/libxml/parser.h index e095babd..73dd71f9 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -262,15 +262,16 @@ struct _xmlParserCtxt { xmlHashTablePtr attsDefault; /* defaulted attributes if any */ xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ int nsWellFormed; /* is the document XML Nanespace okay */ + int options; /* Extra options */ /* * Those fields are needed only for treaming parsing so far */ - int dictNames; /* Use dictionary names for the tree */ - int freeElemsNr; /* number of freed element nodes */ - xmlNodePtr freeElems; /* List of freed element nodes */ - int freeAttrsNr; /* number of freed attributes nodes */ - xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ + int dictNames; /* Use dictionary names for the tree */ + int freeElemsNr; /* number of freed element nodes */ + xmlNodePtr freeElems; /* List of freed element nodes */ + int freeAttrsNr; /* number of freed attributes nodes */ + xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ }; /** @@ -1045,7 +1046,9 @@ typedef enum { XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */ XML_PARSE_NONET = 1<<11,/* Forbid network access */ - XML_PARSE_NODICT = 1<<12 /* Do not reuse the context dictionnary */ + XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ + XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ + XML_PARSE_NOCDATA = 1<<14 /* merge CDATA as text nodes */ } xmlParserOption; XMLPUBFUN void XMLCALL diff --git a/result/cdata2 b/result/cdata2 new file mode 100644 index 00000000..b4db7917 --- /dev/null +++ b/result/cdata2 @@ -0,0 +1,6 @@ + + + ]> + diff --git a/result/cdata2.rdr b/result/cdata2.rdr new file mode 100644 index 00000000..e69a6729 --- /dev/null +++ b/result/cdata2.rdr @@ -0,0 +1,13 @@ +0 1 collection 0 0 +1 14 #text 0 1 + +1 1 test 0 0 +2 4 #cdata-section 0 1 + +2 4 #cdata-section 0 1 + +1 15 test 0 0 +1 14 #text 0 1 + +0 15 collection 0 0 diff --git a/result/cdata2.sax b/result/cdata2.sax new file mode 100644 index 00000000..46b025e8 --- /dev/null +++ b/result/cdata2.sax @@ -0,0 +1,18 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(collection) +SAX.characters( + , 3) +SAX.startElement(test) +SAX.pcdata( + , 1) +SAX.pcdata( + , 3) +SAX.endElement(test) +SAX.characters( +, 1) +SAX.endElement(collection) +SAX.endDocument() diff --git a/result/noent/cdata2 b/result/noent/cdata2 new file mode 100644 index 00000000..b4db7917 --- /dev/null +++ b/result/noent/cdata2 @@ -0,0 +1,6 @@ + + + ]> + diff --git a/test/cdata2 b/test/cdata2 new file mode 100644 index 00000000..b4db7917 --- /dev/null +++ b/test/cdata2 @@ -0,0 +1,6 @@ + + + ]> + diff --git a/testHTML.c b/testHTML.c index f48612d8..81233248 100644 --- a/testHTML.c +++ b/testHTML.c @@ -46,6 +46,7 @@ static int repeat = 0; static int noout = 0; static int push = 0; static char *encoding = NULL; +static int options = 0; xmlSAXHandler emptySAXHandlerStruct = { NULL, /* internalSubset */ @@ -725,7 +726,7 @@ parseAndPrintFile(char *filename) { fclose(f); } } else { - doc = htmlParseFile(filename, NULL); + doc = htmlReadFile(filename, NULL, options); } if (doc == NULL) { xmlGenericError(xmlGenericErrorContext, diff --git a/tree.c b/tree.c index 4eed575a..4b7ef5ee 100644 --- a/tree.c +++ b/tree.c @@ -7273,6 +7273,7 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level, int format, const char *encoding) { int i; xmlNodePtr tmp; + xmlChar *start, *end; if (cur == NULL) { #ifdef DEBUG_TREE @@ -7356,10 +7357,22 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, return; } if (cur->type == XML_CDATA_SECTION_NODE) { - xmlOutputBufferWriteString(buf, "content != NULL) - xmlOutputBufferWriteString(buf, (const char *)cur->content); - xmlOutputBufferWriteString(buf, "]]>"); + start = end = cur->content; + while (*end != '\0') { + if ((*end == ']') && (*(end + 1) == ']') && (*(end + 2) == '>')) { + end = end + 2; + xmlOutputBufferWriteString(buf, ""); + start = end; + } + end++; + } + if (start != end) { + xmlOutputBufferWriteString(buf, ""); + } return; } if (cur->type == XML_ATTRIBUTE_NODE) { @@ -7810,6 +7823,7 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level, int format, const char *encoding) { int i; xmlNodePtr tmp; + xmlChar *start, *end; if (cur == NULL) { #ifdef DEBUG_TREE @@ -7893,10 +7907,22 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, return; } if (cur->type == XML_CDATA_SECTION_NODE) { - xmlOutputBufferWriteString(buf, "content != NULL) - xmlOutputBufferWriteString(buf, (const char *)cur->content); - xmlOutputBufferWriteString(buf, "]]>"); + start = end = cur->content; + while (*end != '\0') { + if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') { + end = end + 2; + xmlOutputBufferWriteString(buf, ""); + start = end; + } + end++; + } + if (start != end) { + xmlOutputBufferWriteString(buf, ""); + } return; } @@ -7989,11 +8015,25 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, (xmlStrchr(child->content, '&') == NULL)) { xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding); } else { - xmlOutputBufferWriteString(buf, "content != NULL) - xmlOutputBufferWriteString(buf, - (const char *)child->content); - xmlOutputBufferWriteString(buf, "]]>"); + start = end = child->content; + while (*end != '\0') { + if (*end == ']' && + *(end + 1) == ']' && + *(end + 2) == '>') { + end = end + 2; + xmlOutputBufferWriteString(buf, ""); + start = end; + } + end++; + } + if (start != end) { + xmlOutputBufferWriteString(buf, ""); + } } } else { xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding); diff --git a/xmllint.c b/xmllint.c index bd835d12..9ef1905f 100644 --- a/xmllint.c +++ b/xmllint.c @@ -764,7 +764,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) { } } else if (html) { - doc = htmlParseFile(filename, NULL); + doc = htmlReadFile(filename, NULL, options); } #endif /* LIBXML_HTML_ENABLED */ else {