mirror of
https://github.com/GNOME/libxml2.git
synced 2025-03-13 18:47:01 +08:00
added the same htmlRead APIs than their XML counterparts new parser
* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h: added the same htmlRead APIs than their XML counterparts * include/libxml/parser.h: new parser options, not yet implemented, added an options field to the context. * tree.c: patch from Shaun McCance to fix bug #123238 when ]]> is found within a cdata section. * result/noent/cdata2 result/cdata2 result/cdata2.rdr result/cdata2.sax test/cdata2: add one more cdata test Daniel
This commit is contained in:
parent
60942def6a
commit
9475a352bd
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
||||
Fri Sep 26 14:41:53 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||
|
||||
* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h:
|
||||
added the same htmlRead APIs than their XML counterparts
|
||||
* include/libxml/parser.h: new parser options, not yet implemented,
|
||||
added an options field to the context.
|
||||
* tree.c: patch from Shaun McCance to fix bug #123238 when ]]>
|
||||
is found within a cdata section.
|
||||
* result/noent/cdata2 result/cdata2 result/cdata2.rdr
|
||||
result/cdata2.sax test/cdata2: add one more cdata test
|
||||
|
||||
Thu Sep 25 23:03:23 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||
|
||||
* parser.c xmllint.c doc/libxml2-api.xml include/libxml/parser.h:
|
||||
|
525
HTMLparser.c
525
HTMLparser.c
@ -5541,4 +5541,529 @@ htmlNodeStatus(const htmlNodePtr node, int legacy) {
|
||||
default: return HTML_NA ;
|
||||
}
|
||||
}
|
||||
/************************************************************************
|
||||
* *
|
||||
* New set (2.6.0) of simpler and more flexible APIs *
|
||||
* *
|
||||
************************************************************************/
|
||||
/**
|
||||
* DICT_FREE:
|
||||
* @str: a string
|
||||
*
|
||||
* Free a string if it is not owned by the "dict" dictionnary in the
|
||||
* current scope
|
||||
*/
|
||||
#define DICT_FREE(str) \
|
||||
if ((str) && ((!dict) || \
|
||||
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
|
||||
xmlFree((char *)(str));
|
||||
|
||||
/**
|
||||
* htmlCtxtReset:
|
||||
* @ctxt: an XML parser context
|
||||
*
|
||||
* Reset a parser context
|
||||
*/
|
||||
void
|
||||
htmlCtxtReset(htmlParserCtxtPtr ctxt)
|
||||
{
|
||||
xmlParserInputPtr input;
|
||||
xmlDictPtr dict = ctxt->dict;
|
||||
|
||||
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
|
||||
xmlFreeInputStream(input);
|
||||
}
|
||||
ctxt->inputNr = 0;
|
||||
ctxt->input = NULL;
|
||||
|
||||
ctxt->spaceNr = 0;
|
||||
ctxt->spaceTab[0] = -1;
|
||||
ctxt->space = &ctxt->spaceTab[0];
|
||||
|
||||
|
||||
ctxt->nodeNr = 0;
|
||||
ctxt->node = NULL;
|
||||
|
||||
ctxt->nameNr = 0;
|
||||
ctxt->name = NULL;
|
||||
|
||||
DICT_FREE(ctxt->version);
|
||||
ctxt->version = NULL;
|
||||
DICT_FREE(ctxt->encoding);
|
||||
ctxt->encoding = NULL;
|
||||
DICT_FREE(ctxt->directory);
|
||||
ctxt->directory = NULL;
|
||||
DICT_FREE(ctxt->extSubURI);
|
||||
ctxt->extSubURI = NULL;
|
||||
DICT_FREE(ctxt->extSubSystem);
|
||||
ctxt->extSubSystem = NULL;
|
||||
if (ctxt->myDoc != NULL)
|
||||
xmlFreeDoc(ctxt->myDoc);
|
||||
ctxt->myDoc = NULL;
|
||||
|
||||
ctxt->standalone = -1;
|
||||
ctxt->hasExternalSubset = 0;
|
||||
ctxt->hasPErefs = 0;
|
||||
ctxt->html = 1;
|
||||
ctxt->external = 0;
|
||||
ctxt->instate = XML_PARSER_START;
|
||||
ctxt->token = 0;
|
||||
|
||||
ctxt->wellFormed = 1;
|
||||
ctxt->nsWellFormed = 1;
|
||||
ctxt->valid = 1;
|
||||
ctxt->vctxt.userData = ctxt;
|
||||
ctxt->vctxt.error = xmlParserValidityError;
|
||||
ctxt->vctxt.warning = xmlParserValidityWarning;
|
||||
ctxt->record_info = 0;
|
||||
ctxt->nbChars = 0;
|
||||
ctxt->checkIndex = 0;
|
||||
ctxt->inSubset = 0;
|
||||
ctxt->errNo = XML_ERR_OK;
|
||||
ctxt->depth = 0;
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
ctxt->catalogs = NULL;
|
||||
xmlInitNodeInfoSeq(&ctxt->node_seq);
|
||||
|
||||
if (ctxt->attsDefault != NULL) {
|
||||
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
|
||||
ctxt->attsDefault = NULL;
|
||||
}
|
||||
if (ctxt->attsSpecial != NULL) {
|
||||
xmlHashFree(ctxt->attsSpecial, NULL);
|
||||
ctxt->attsSpecial = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlCtxtUseOptions:
|
||||
* @ctxt: an HTML parser context
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* Applies the options to the parser context
|
||||
*
|
||||
* Returns 0 in case of success, the set of unknown or unimplemented options
|
||||
* in case of error.
|
||||
*/
|
||||
int
|
||||
htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
|
||||
{
|
||||
if (options & HTML_PARSE_NOWARNING) {
|
||||
ctxt->sax->warning = NULL;
|
||||
options -= XML_PARSE_NOWARNING;
|
||||
}
|
||||
if (options & HTML_PARSE_NOERROR) {
|
||||
ctxt->sax->error = NULL;
|
||||
ctxt->sax->fatalError = NULL;
|
||||
options -= XML_PARSE_NOERROR;
|
||||
}
|
||||
if (options & HTML_PARSE_PEDANTIC) {
|
||||
ctxt->pedantic = 1;
|
||||
options -= XML_PARSE_PEDANTIC;
|
||||
} else
|
||||
ctxt->pedantic = 0;
|
||||
if (options & XML_PARSE_NOBLANKS) {
|
||||
ctxt->keepBlanks = 0;
|
||||
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
||||
options -= XML_PARSE_NOBLANKS;
|
||||
} else
|
||||
ctxt->keepBlanks = 1;
|
||||
ctxt->dictNames = 0;
|
||||
return (options);
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlDoRead:
|
||||
* @ctxt: an HTML parser context
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
* @reuse: keep the context for reuse
|
||||
*
|
||||
* Common front-end for the htmlRead functions
|
||||
*
|
||||
* Returns the resulting document tree or NULL
|
||||
*/
|
||||
static htmlDocPtr
|
||||
htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
|
||||
int options, int reuse)
|
||||
{
|
||||
htmlDocPtr ret;
|
||||
|
||||
htmlCtxtUseOptions(ctxt, options);
|
||||
ctxt->html = 1;
|
||||
if (encoding != NULL) {
|
||||
xmlCharEncodingHandlerPtr hdlr;
|
||||
|
||||
hdlr = xmlFindCharEncodingHandler(encoding);
|
||||
if (hdlr != NULL)
|
||||
xmlSwitchToEncoding(ctxt, hdlr);
|
||||
}
|
||||
if ((URL != NULL) && (ctxt->input != NULL) &&
|
||||
(ctxt->input->filename == NULL))
|
||||
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
|
||||
htmlParseDocument(ctxt);
|
||||
ret = ctxt->myDoc;
|
||||
ctxt->myDoc = NULL;
|
||||
if (!reuse) {
|
||||
if ((ctxt->dictNames) &&
|
||||
(ret != NULL) &&
|
||||
(ret->dict == ctxt->dict))
|
||||
ctxt->dict = NULL;
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
} else {
|
||||
/* Must duplicate the reference to the dictionary */
|
||||
if ((ctxt->dictNames) &&
|
||||
(ret != NULL) &&
|
||||
(ret->dict == ctxt->dict))
|
||||
xmlDictReference(ctxt->dict);
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlReadDoc:
|
||||
* @cur: a pointer to a zero terminated string
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML in-memory document and build a tree.
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
|
||||
{
|
||||
htmlParserCtxtPtr ctxt;
|
||||
|
||||
if (cur == NULL)
|
||||
return (NULL);
|
||||
|
||||
ctxt = xmlCreateDocParserCtxt(cur);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlReadFile:
|
||||
* @filename: a file or URL
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML file from the filesystem or the network.
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlReadFile(const char *filename, const char *encoding, int options)
|
||||
{
|
||||
htmlParserCtxtPtr ctxt;
|
||||
|
||||
ctxt = htmlCreateFileParserCtxt(filename, encoding);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
return (htmlDoRead(ctxt, NULL, NULL, options, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlReadMemory:
|
||||
* @buffer: a pointer to a char array
|
||||
* @size: the size of the array
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML in-memory document and build a tree.
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
|
||||
{
|
||||
htmlParserCtxtPtr ctxt;
|
||||
|
||||
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlReadFd:
|
||||
* @fd: an open file descriptor
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML from a file descriptor and build a tree.
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlReadFd(int fd, const char *URL, const char *encoding, int options)
|
||||
{
|
||||
htmlParserCtxtPtr ctxt;
|
||||
xmlParserInputBufferPtr input;
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (fd < 0)
|
||||
return (NULL);
|
||||
|
||||
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
||||
if (input == NULL)
|
||||
return (NULL);
|
||||
ctxt = xmlNewParserCtxt();
|
||||
if (ctxt == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
return (NULL);
|
||||
}
|
||||
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||
if (stream == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
return (NULL);
|
||||
}
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlReadIO:
|
||||
* @ioread: an I/O read function
|
||||
* @ioclose: an I/O close function
|
||||
* @ioctx: an I/O handler
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an HTML document from I/O functions and source and build a tree.
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
|
||||
void *ioctx, const char *URL, const char *encoding, int options)
|
||||
{
|
||||
htmlParserCtxtPtr ctxt;
|
||||
xmlParserInputBufferPtr input;
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (ioread == NULL)
|
||||
return (NULL);
|
||||
|
||||
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
||||
XML_CHAR_ENCODING_NONE);
|
||||
if (input == NULL)
|
||||
return (NULL);
|
||||
ctxt = xmlNewParserCtxt();
|
||||
if (ctxt == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
return (NULL);
|
||||
}
|
||||
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||
if (stream == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
return (NULL);
|
||||
}
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlCtxtReadDoc:
|
||||
* @ctxt: an HTML parser context
|
||||
* @cur: a pointer to a zero terminated string
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML in-memory document and build a tree.
|
||||
* This reuses the existing @ctxt parser context
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
|
||||
const char *URL, const char *encoding, int options)
|
||||
{
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (cur == NULL)
|
||||
return (NULL);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
|
||||
htmlCtxtReset(ctxt);
|
||||
|
||||
stream = xmlNewStringInputStream(ctxt, cur);
|
||||
if (stream == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlCtxtReadFile:
|
||||
* @ctxt: an HTML parser context
|
||||
* @filename: a file or URL
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML file from the filesystem or the network.
|
||||
* This reuses the existing @ctxt parser context
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
|
||||
const char *encoding, int options)
|
||||
{
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (filename == NULL)
|
||||
return (NULL);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
|
||||
htmlCtxtReset(ctxt);
|
||||
|
||||
stream = xmlNewInputFromFile(ctxt, filename);
|
||||
if (stream == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, NULL, encoding, options, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlCtxtReadMemory:
|
||||
* @ctxt: an HTML parser context
|
||||
* @buffer: a pointer to a char array
|
||||
* @size: the size of the array
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML in-memory document and build a tree.
|
||||
* This reuses the existing @ctxt parser context
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
|
||||
const char *URL, const char *encoding, int options)
|
||||
{
|
||||
xmlParserInputBufferPtr input;
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
if (buffer == NULL)
|
||||
return (NULL);
|
||||
|
||||
htmlCtxtReset(ctxt);
|
||||
|
||||
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
|
||||
if (input == NULL) {
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||
if (stream == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlCtxtReadFd:
|
||||
* @ctxt: an HTML parser context
|
||||
* @fd: an open file descriptor
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an XML from a file descriptor and build a tree.
|
||||
* This reuses the existing @ctxt parser context
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
|
||||
const char *URL, const char *encoding, int options)
|
||||
{
|
||||
xmlParserInputBufferPtr input;
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (fd < 0)
|
||||
return (NULL);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
|
||||
htmlCtxtReset(ctxt);
|
||||
|
||||
|
||||
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
||||
if (input == NULL)
|
||||
return (NULL);
|
||||
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||
if (stream == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
return (NULL);
|
||||
}
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlCtxtReadIO:
|
||||
* @ctxt: an HTML parser context
|
||||
* @ioread: an I/O read function
|
||||
* @ioclose: an I/O close function
|
||||
* @ioctx: an I/O handler
|
||||
* @URL: the base URL to use for the document
|
||||
* @encoding: the document encoding, or NULL
|
||||
* @options: a combination of htmlParserOption(s)
|
||||
*
|
||||
* parse an HTML document from I/O functions and source and build a tree.
|
||||
* This reuses the existing @ctxt parser context
|
||||
*
|
||||
* Returns the resulting document tree
|
||||
*/
|
||||
htmlDocPtr
|
||||
htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose, void *ioctx,
|
||||
const char *URL,
|
||||
const char *encoding, int options)
|
||||
{
|
||||
xmlParserInputBufferPtr input;
|
||||
xmlParserInputPtr stream;
|
||||
|
||||
if (ioread == NULL)
|
||||
return (NULL);
|
||||
if (ctxt == NULL)
|
||||
return (NULL);
|
||||
|
||||
htmlCtxtReset(ctxt);
|
||||
|
||||
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
||||
XML_CHAR_ENCODING_NONE);
|
||||
if (input == NULL)
|
||||
return (NULL);
|
||||
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||
if (stream == NULL) {
|
||||
xmlFreeParserInputBuffer(input);
|
||||
return (NULL);
|
||||
}
|
||||
inputPush(ctxt, stream);
|
||||
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||
}
|
||||
|
||||
#endif /* LIBXML_HTML_ENABLED */
|
||||
|
@ -154,6 +154,88 @@ XMLPUBFUN int XMLCALL
|
||||
int size,
|
||||
int terminate);
|
||||
|
||||
/*
|
||||
* New set of simpler/more flexible APIs
|
||||
*/
|
||||
/**
|
||||
* xmlParserOption:
|
||||
*
|
||||
* This is the set of XML parser options that can be passed down
|
||||
* to the xmlReadDoc() and similar calls.
|
||||
*/
|
||||
typedef enum {
|
||||
HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
|
||||
HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
|
||||
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
|
||||
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
|
||||
HTML_PARSE_NONET = 1<<11 /* Forbid network access */
|
||||
} htmlParserOption;
|
||||
|
||||
XMLPUBFUN void XMLCALL
|
||||
htmlCtxtReset (htmlParserCtxtPtr ctxt);
|
||||
XMLPUBFUN int XMLCALL
|
||||
htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlReadDoc (const xmlChar *cur,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlReadFile (const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlReadMemory (const char *buffer,
|
||||
int size,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlReadFd (int fd,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlReadIO (xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void *ioctx,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
|
||||
const xmlChar *cur,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
|
||||
const char *filename,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
|
||||
const char *buffer,
|
||||
int size,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
|
||||
int fd,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
XMLPUBFUN htmlDocPtr XMLCALL
|
||||
htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
|
||||
xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void *ioctx,
|
||||
const char *URL,
|
||||
const char *encoding,
|
||||
int options);
|
||||
|
||||
/* NRK/Jan2003: further knowledge of HTML structure
|
||||
*/
|
||||
typedef enum {
|
||||
|
@ -262,15 +262,16 @@ struct _xmlParserCtxt {
|
||||
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
|
||||
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
|
||||
int nsWellFormed; /* is the document XML Nanespace okay */
|
||||
int options; /* Extra options */
|
||||
|
||||
/*
|
||||
* Those fields are needed only for treaming parsing so far
|
||||
*/
|
||||
int dictNames; /* Use dictionary names for the tree */
|
||||
int freeElemsNr; /* number of freed element nodes */
|
||||
xmlNodePtr freeElems; /* List of freed element nodes */
|
||||
int freeAttrsNr; /* number of freed attributes nodes */
|
||||
xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
|
||||
int dictNames; /* Use dictionary names for the tree */
|
||||
int freeElemsNr; /* number of freed element nodes */
|
||||
xmlNodePtr freeElems; /* List of freed element nodes */
|
||||
int freeAttrsNr; /* number of freed attributes nodes */
|
||||
xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1045,7 +1046,9 @@ typedef enum {
|
||||
XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
|
||||
XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
|
||||
XML_PARSE_NONET = 1<<11,/* Forbid network access */
|
||||
XML_PARSE_NODICT = 1<<12 /* Do not reuse the context dictionnary */
|
||||
XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
|
||||
XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
|
||||
XML_PARSE_NOCDATA = 1<<14 /* merge CDATA as text nodes */
|
||||
} xmlParserOption;
|
||||
|
||||
XMLPUBFUN void XMLCALL
|
||||
|
6
result/cdata2
Normal file
6
result/cdata2
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<collection>
|
||||
<test><![CDATA[
|
||||
<![CDATA[abc]]]>]><![CDATA[
|
||||
]]></test>
|
||||
</collection>
|
13
result/cdata2.rdr
Normal file
13
result/cdata2.rdr
Normal file
@ -0,0 +1,13 @@
|
||||
0 1 collection 0 0
|
||||
1 14 #text 0 1
|
||||
|
||||
1 1 test 0 0
|
||||
2 4 #cdata-section 0 1
|
||||
<![CDATA[abc]
|
||||
2 3 #text 0 1 ]>
|
||||
2 4 #cdata-section 0 1
|
||||
|
||||
1 15 test 0 0
|
||||
1 14 #text 0 1
|
||||
|
||||
0 15 collection 0 0
|
18
result/cdata2.sax
Normal file
18
result/cdata2.sax
Normal file
@ -0,0 +1,18 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.startElement(collection)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.startElement(test)
|
||||
SAX.pcdata(
|
||||
<![CDATA[abc], 18)
|
||||
SAX.characters(], 1)
|
||||
SAX.getEntity(gt)
|
||||
SAX.characters(>, 1)
|
||||
SAX.pcdata(
|
||||
, 3)
|
||||
SAX.endElement(test)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.endElement(collection)
|
||||
SAX.endDocument()
|
6
result/noent/cdata2
Normal file
6
result/noent/cdata2
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<collection>
|
||||
<test><![CDATA[
|
||||
<![CDATA[abc]]]>]><![CDATA[
|
||||
]]></test>
|
||||
</collection>
|
6
test/cdata2
Normal file
6
test/cdata2
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<collection>
|
||||
<test><![CDATA[
|
||||
<![CDATA[abc]]]>]><![CDATA[
|
||||
]]></test>
|
||||
</collection>
|
@ -46,6 +46,7 @@ static int repeat = 0;
|
||||
static int noout = 0;
|
||||
static int push = 0;
|
||||
static char *encoding = NULL;
|
||||
static int options = 0;
|
||||
|
||||
xmlSAXHandler emptySAXHandlerStruct = {
|
||||
NULL, /* internalSubset */
|
||||
@ -725,7 +726,7 @@ parseAndPrintFile(char *filename) {
|
||||
fclose(f);
|
||||
}
|
||||
} else {
|
||||
doc = htmlParseFile(filename, NULL);
|
||||
doc = htmlReadFile(filename, NULL, options);
|
||||
}
|
||||
if (doc == NULL) {
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
|
66
tree.c
66
tree.c
@ -7273,6 +7273,7 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
xmlNodePtr cur, int level, int format, const char *encoding) {
|
||||
int i;
|
||||
xmlNodePtr tmp;
|
||||
xmlChar *start, *end;
|
||||
|
||||
if (cur == NULL) {
|
||||
#ifdef DEBUG_TREE
|
||||
@ -7356,10 +7357,22 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
return;
|
||||
}
|
||||
if (cur->type == XML_CDATA_SECTION_NODE) {
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
if (cur->content != NULL)
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
start = end = cur->content;
|
||||
while (*end != '\0') {
|
||||
if ((*end == ']') && (*(end + 1) == ']') && (*(end + 2) == '>')) {
|
||||
end = end + 2;
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
xmlOutputBufferWrite(buf, end - start, (const char *)start);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
start = end;
|
||||
}
|
||||
end++;
|
||||
}
|
||||
if (start != end) {
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
xmlOutputBufferWriteString(buf, (const char *)start);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (cur->type == XML_ATTRIBUTE_NODE) {
|
||||
@ -7810,6 +7823,7 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
||||
int level, int format, const char *encoding) {
|
||||
int i;
|
||||
xmlNodePtr tmp;
|
||||
xmlChar *start, *end;
|
||||
|
||||
if (cur == NULL) {
|
||||
#ifdef DEBUG_TREE
|
||||
@ -7893,10 +7907,22 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
||||
return;
|
||||
}
|
||||
if (cur->type == XML_CDATA_SECTION_NODE) {
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
if (cur->content != NULL)
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
start = end = cur->content;
|
||||
while (*end != '\0') {
|
||||
if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') {
|
||||
end = end + 2;
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
xmlOutputBufferWrite(buf, end - start, (const char *)start);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
start = end;
|
||||
}
|
||||
end++;
|
||||
}
|
||||
if (start != end) {
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
xmlOutputBufferWriteString(buf, (const char *)start);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -7989,11 +8015,25 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
||||
(xmlStrchr(child->content, '&') == NULL)) {
|
||||
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
|
||||
} else {
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
if (child->content != NULL)
|
||||
xmlOutputBufferWriteString(buf,
|
||||
(const char *)child->content);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
start = end = child->content;
|
||||
while (*end != '\0') {
|
||||
if (*end == ']' &&
|
||||
*(end + 1) == ']' &&
|
||||
*(end + 2) == '>') {
|
||||
end = end + 2;
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
xmlOutputBufferWrite(buf, end - start,
|
||||
(const char *)start);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
start = end;
|
||||
}
|
||||
end++;
|
||||
}
|
||||
if (start != end) {
|
||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||
xmlOutputBufferWriteString(buf, (const char *)start);
|
||||
xmlOutputBufferWriteString(buf, "]]>");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
|
||||
|
Loading…
x
Reference in New Issue
Block a user