mirror of
https://github.com/GNOME/libxml2.git
synced 2025-02-11 18:09:31 +08:00
New set of cleanups, released 2.2.3:
- SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c: removed a few warnings in pedantic mode ... - parserInternals.c parser.c: moved encoding switching function to parserInternals.c - configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3 Daniel
This commit is contained in:
parent
a2c6da94f8
commit
04698d9e1c
@ -1,3 +1,11 @@
|
||||
Sun Sep 17 17:58:37 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c:
|
||||
removed a few warnings in pedantic mode ...
|
||||
* parserInternals.c parser.c: moved encoding switching function
|
||||
to parserInternals.c
|
||||
* configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3
|
||||
|
||||
Sat Sep 16 20:12:41 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* HTMLparser.c parser.c: set ctxt->errNo before calling the
|
||||
|
12
SAX.c
12
SAX.c
@ -312,7 +312,7 @@ resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
|
||||
{
|
||||
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
|
||||
xmlParserInputPtr ret;
|
||||
char *URI;
|
||||
xmlChar *URI;
|
||||
const char *base = NULL;
|
||||
|
||||
if (ctxt->input != NULL)
|
||||
@ -320,7 +320,7 @@ resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
|
||||
if (base == NULL)
|
||||
base = ctxt->directory;
|
||||
|
||||
URI = xmlBuildURI(systemId, base);
|
||||
URI = xmlBuildURI(systemId, (const xmlChar *) base);
|
||||
|
||||
#ifdef DEBUG_SAX
|
||||
fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId);
|
||||
@ -423,7 +423,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
|
||||
ctxt->sax->warning(ctxt,
|
||||
"Entity(%s) already defined in the internal subset\n", name);
|
||||
if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
|
||||
char *URI;
|
||||
xmlChar *URI;
|
||||
const char *base = NULL;
|
||||
|
||||
if (ctxt->input != NULL)
|
||||
@ -431,7 +431,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
|
||||
if (base == NULL)
|
||||
base = ctxt->directory;
|
||||
|
||||
URI = xmlBuildURI(systemId, base);
|
||||
URI = xmlBuildURI(systemId, (const xmlChar *) base);
|
||||
ent->URI = URI;
|
||||
}
|
||||
} else if (ctxt->inSubset == 2) {
|
||||
@ -442,7 +442,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
|
||||
ctxt->sax->warning(ctxt,
|
||||
"Entity(%s) already defined in the external subset\n", name);
|
||||
if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
|
||||
char *URI;
|
||||
xmlChar *URI;
|
||||
const char *base = NULL;
|
||||
|
||||
if (ctxt->input != NULL)
|
||||
@ -450,7 +450,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
|
||||
if (base == NULL)
|
||||
base = ctxt->directory;
|
||||
|
||||
URI = xmlBuildURI(systemId, base);
|
||||
URI = xmlBuildURI(systemId, (const xmlChar *) base);
|
||||
ent->URI = URI;
|
||||
}
|
||||
} else {
|
||||
|
@ -27,6 +27,9 @@
|
||||
/* Define if you have the fpclass function. */
|
||||
#undef HAVE_FPCLASS
|
||||
|
||||
/* Define if you have the iconv function. */
|
||||
#undef HAVE_ICONV
|
||||
|
||||
/* Define if you have the isnand function. */
|
||||
#undef HAVE_ISNAND
|
||||
|
||||
@ -96,6 +99,9 @@
|
||||
/* Define if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define if you have the <sys/dir.h> header file. */
|
||||
#undef HAVE_SYS_DIR_H
|
||||
|
||||
@ -144,3 +150,6 @@
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Define if compiler has function prototypes */
|
||||
#undef PROTOTYPES
|
||||
|
||||
|
@ -5,7 +5,7 @@ AM_CONFIG_HEADER(config.h)
|
||||
|
||||
LIBXML_MAJOR_VERSION=2
|
||||
LIBXML_MINOR_VERSION=2
|
||||
LIBXML_MICRO_VERSION=2
|
||||
LIBXML_MICRO_VERSION=3
|
||||
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
|
||||
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
|
||||
|
||||
@ -243,6 +243,7 @@ if test "$with_iconv" = "no" ; then
|
||||
echo Disabling ICONV support
|
||||
WITH_ICONV=0
|
||||
else
|
||||
AC_CHECK_FUNCS(iconv)
|
||||
if test "$have_iconv" != "" ; then
|
||||
echo Iconv support not found
|
||||
WITH_ICONV=0
|
||||
|
@ -917,6 +917,9 @@ static int xmlLsCountNode(xmlNodePtr node) {
|
||||
break;
|
||||
case XML_DOCUMENT_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
list = ((xmlDocPtr) node)->children;
|
||||
break;
|
||||
case XML_ATTRIBUTE_NODE:
|
||||
|
@ -51,6 +51,6 @@ install-data-local:
|
||||
-(cd $(DESTDIR); gtkdoc-fixxref --module=$(DOC_MODULE) --html-dir=$(HTML_DIR))
|
||||
|
||||
dist-hook:
|
||||
(cd $(srcdir) ; tar cvf - xml.html FAQ.html encoding.html structure.gif DOM.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
|
||||
(cd $(srcdir) ; tar cvf - *.html *.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
|
||||
|
||||
.PHONY : html sgml templates scan
|
||||
|
@ -92,8 +92,8 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%defattr(-, root, root)
|
||||
|
||||
%doc AUTHORS ChangeLog NEWS README COPYING COPYING.LIB TODO
|
||||
%doc /usr/man/man1/xmllint.1
|
||||
%doc /usr/man/man4/libxml.4
|
||||
%doc /usr/man/man1/xmllint.1*
|
||||
%doc /usr/man/man4/libxml.4*
|
||||
|
||||
%{prefix}/lib/lib*.so.*
|
||||
%{prefix}/bin/xmllint
|
||||
|
305
parser.c
305
parser.c
@ -899,311 +899,6 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to handle encodings *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* xmlSwitchEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @enc: the encoding value (number)
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
{
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
/* default encoding, no conversion should be needed */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
handler = xmlGetCharEncodingHandler(enc);
|
||||
if (handler == NULL) {
|
||||
/*
|
||||
* Default handlers.
|
||||
*/
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
case XML_CHAR_ENCODING_ASCII:
|
||||
/* default encoding, no conversion should be needed */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF16LE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UTF16BE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4LE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 little endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4BE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 big endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EBCDIC not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_2143:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 2143 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_3412:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 3412 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS2:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS2 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_1:
|
||||
case XML_CHAR_ENCODING_8859_2:
|
||||
case XML_CHAR_ENCODING_8859_3:
|
||||
case XML_CHAR_ENCODING_8859_4:
|
||||
case XML_CHAR_ENCODING_8859_5:
|
||||
case XML_CHAR_ENCODING_8859_6:
|
||||
case XML_CHAR_ENCODING_8859_7:
|
||||
case XML_CHAR_ENCODING_8859_8:
|
||||
case XML_CHAR_ENCODING_8859_9:
|
||||
/*
|
||||
* We used to keep the internal content in the
|
||||
* document encoding however this turns being unmaintainable
|
||||
* So xmlGetCharEncodingHandler() will return non-null
|
||||
* values for this now.
|
||||
*/
|
||||
if ((ctxt->inputNr == 1) &&
|
||||
(ctxt->encoding == NULL) &&
|
||||
(ctxt->input->encoding != NULL)) {
|
||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||
}
|
||||
ctxt->charset = enc;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_2022_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO-2022-JPnot supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding Shift_JIS not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EUC_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EUC-JPnot supported\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (handler == NULL)
|
||||
return(-1);
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(xmlSwitchToEncoding(ctxt, handler));
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchToEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @handler: the encoding handler
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||
{
|
||||
int nbchars;
|
||||
|
||||
if (handler != NULL) {
|
||||
if (ctxt->input != NULL) {
|
||||
if (ctxt->input->buf != NULL) {
|
||||
if (ctxt->input->buf->encoder != NULL) {
|
||||
if (ctxt->input->buf->encoder == handler)
|
||||
return(0);
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*/
|
||||
xmlCharEncCloseFunc(ctxt->input->buf->encoder);
|
||||
ctxt->input->buf->encoder = handler;
|
||||
return(0);
|
||||
}
|
||||
ctxt->input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if ((ctxt->input->buf->buffer != NULL) &&
|
||||
(ctxt->input->buf->buffer->use > 0)) {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||
(ctxt->input->cur[0] == 0xFF) &&
|
||||
(ctxt->input->cur[1] == 0xFE)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(ctxt->input->cur[0] == 0xFE) &&
|
||||
(ctxt->input->cur[1] == 0xFF)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
||||
ctxt->input->buf->raw = ctxt->input->buf->buffer;
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* converst as much as possbile of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
}
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
|
||||
}
|
||||
return(0);
|
||||
} else {
|
||||
if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
|
||||
/*
|
||||
* When parsing a static memory array one must know the
|
||||
* size to be able to convert the buffer.
|
||||
*/
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
} else {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
ctxt->input->buf->raw = xmlBufferCreate();
|
||||
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
|
||||
ctxt->input->length - processed);
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
*/
|
||||
if ((ctxt->input->free != NULL) &&
|
||||
(ctxt->input->base != NULL))
|
||||
ctxt->input->free((xmlChar *) ctxt->input->base);
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
}
|
||||
/*
|
||||
* The parsing is now done in UTF8 natively
|
||||
*/
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
} else
|
||||
return(-1);
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to handle xmlChars *
|
||||
|
@ -1494,6 +1494,311 @@ xmlCopyChar(int len, xmlChar *out, int val) {
|
||||
return(1);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to switch encodings *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* xmlSwitchEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @enc: the encoding value (number)
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
{
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
/* default encoding, no conversion should be needed */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
handler = xmlGetCharEncodingHandler(enc);
|
||||
if (handler == NULL) {
|
||||
/*
|
||||
* Default handlers.
|
||||
*/
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
case XML_CHAR_ENCODING_ASCII:
|
||||
/* default encoding, no conversion should be needed */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF16LE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UTF16BE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4LE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 little endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4BE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 big endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EBCDIC not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_2143:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 2143 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_3412:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 3412 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS2:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS2 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_1:
|
||||
case XML_CHAR_ENCODING_8859_2:
|
||||
case XML_CHAR_ENCODING_8859_3:
|
||||
case XML_CHAR_ENCODING_8859_4:
|
||||
case XML_CHAR_ENCODING_8859_5:
|
||||
case XML_CHAR_ENCODING_8859_6:
|
||||
case XML_CHAR_ENCODING_8859_7:
|
||||
case XML_CHAR_ENCODING_8859_8:
|
||||
case XML_CHAR_ENCODING_8859_9:
|
||||
/*
|
||||
* We used to keep the internal content in the
|
||||
* document encoding however this turns being unmaintainable
|
||||
* So xmlGetCharEncodingHandler() will return non-null
|
||||
* values for this now.
|
||||
*/
|
||||
if ((ctxt->inputNr == 1) &&
|
||||
(ctxt->encoding == NULL) &&
|
||||
(ctxt->input->encoding != NULL)) {
|
||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||
}
|
||||
ctxt->charset = enc;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_2022_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO-2022-JPnot supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding Shift_JIS not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EUC_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EUC-JPnot supported\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (handler == NULL)
|
||||
return(-1);
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(xmlSwitchToEncoding(ctxt, handler));
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchToEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @handler: the encoding handler
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||
{
|
||||
int nbchars;
|
||||
|
||||
if (handler != NULL) {
|
||||
if (ctxt->input != NULL) {
|
||||
if (ctxt->input->buf != NULL) {
|
||||
if (ctxt->input->buf->encoder != NULL) {
|
||||
if (ctxt->input->buf->encoder == handler)
|
||||
return(0);
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*/
|
||||
xmlCharEncCloseFunc(ctxt->input->buf->encoder);
|
||||
ctxt->input->buf->encoder = handler;
|
||||
return(0);
|
||||
}
|
||||
ctxt->input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if ((ctxt->input->buf->buffer != NULL) &&
|
||||
(ctxt->input->buf->buffer->use > 0)) {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||
(ctxt->input->cur[0] == 0xFF) &&
|
||||
(ctxt->input->cur[1] == 0xFE)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(ctxt->input->cur[0] == 0xFE) &&
|
||||
(ctxt->input->cur[1] == 0xFF)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
||||
ctxt->input->buf->raw = ctxt->input->buf->buffer;
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* converst as much as possbile of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
}
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
|
||||
}
|
||||
return(0);
|
||||
} else {
|
||||
if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
|
||||
/*
|
||||
* When parsing a static memory array one must know the
|
||||
* size to be able to convert the buffer.
|
||||
*/
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
} else {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
ctxt->input->buf->raw = xmlBufferCreate();
|
||||
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
|
||||
ctxt->input->length - processed);
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
*/
|
||||
if ((ctxt->input->free != NULL) &&
|
||||
(ctxt->input->base != NULL))
|
||||
ctxt->input->free((xmlChar *) ctxt->input->base);
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
}
|
||||
/*
|
||||
* The parsing is now done in UTF8 natively
|
||||
*/
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
} else
|
||||
return(-1);
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to handle entities processing *
|
||||
@ -1705,7 +2010,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
||||
return(NULL);
|
||||
|
||||
URI = xmlStrdup((xmlChar *) filename);
|
||||
directory = xmlParserGetDirectory(URI);
|
||||
directory = xmlParserGetDirectory((const char *) URI);
|
||||
|
||||
inputStream = xmlNewInputStream(ctxt);
|
||||
if (inputStream == NULL) {
|
||||
@ -1714,7 +2019,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
inputStream->filename = URI;
|
||||
inputStream->filename = (const char *) URI;
|
||||
inputStream->directory = directory;
|
||||
inputStream->buf = buf;
|
||||
|
||||
|
18
tree.c
18
tree.c
@ -2637,6 +2637,9 @@ xmlNodeSetLang(xmlNodePtr cur, const xmlChar *lang) {
|
||||
case XML_PI_NODE:
|
||||
case XML_ENTITY_REF_NODE:
|
||||
case XML_ENTITY_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return;
|
||||
case XML_ELEMENT_NODE:
|
||||
case XML_ATTRIBUTE_NODE:
|
||||
@ -2719,6 +2722,9 @@ xmlNodeSetName(xmlNodePtr cur, const xmlChar *name) {
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
case XML_NOTATION_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return;
|
||||
case XML_ELEMENT_NODE:
|
||||
case XML_ATTRIBUTE_NODE:
|
||||
@ -2845,6 +2851,9 @@ xmlNodeGetContent(xmlNodePtr cur) {
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_NOTATION_NODE:
|
||||
case XML_DTD_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return(NULL);
|
||||
case XML_ELEMENT_DECL:
|
||||
/* TODO !!! */
|
||||
@ -2930,6 +2939,9 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) {
|
||||
case XML_DOCUMENT_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
break;
|
||||
case XML_NOTATION_NODE:
|
||||
break;
|
||||
@ -3012,6 +3024,9 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
|
||||
case XML_DTD_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
break;
|
||||
case XML_ELEMENT_DECL:
|
||||
/* TODO !!! */
|
||||
@ -3096,6 +3111,9 @@ xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
|
||||
case XML_DTD_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
break;
|
||||
case XML_ELEMENT_DECL:
|
||||
case XML_ATTRIBUTE_DECL:
|
||||
|
3
valid.c
3
valid.c
@ -3561,6 +3561,9 @@ xmlSprintfElementChilds(char *buf, xmlNodePtr node, int glob) {
|
||||
break;
|
||||
case XML_ATTRIBUTE_NODE:
|
||||
case XML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
|
12
xpath.c
12
xpath.c
@ -1504,6 +1504,9 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return(((xmlDocPtr) ctxt->context->node)->children);
|
||||
case XML_ELEMENT_DECL:
|
||||
case XML_ATTRIBUTE_DECL:
|
||||
@ -1632,6 +1635,9 @@ xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
@ -1686,6 +1692,9 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return(NULL);
|
||||
}
|
||||
return(NULL);
|
||||
@ -1717,6 +1726,9 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
#ifdef LIBXML_SGML_ENABLED
|
||||
case XML_SGML_DOCUMENT_NODE:
|
||||
#endif
|
||||
return(NULL);
|
||||
}
|
||||
return(NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user