New set of cleanups, released 2.2.3:

- SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c: removed a few warnings in pedantic mode ... - parserInternals.c parser.c: moved encoding switching function to parserInternals.c - configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3 Daniel
2025-03-25 19:01:31 +08:00 · 2000-09-17 16:00:22 +00:00 · 2000-09-17 16:00:22 +00:00 · 04698d9e1c
commit 04698d9e1c
parent a2c6da94f8
12 changed files with 371 additions and 317 deletions
--- a/8
+++ b/8
@ -1,3 +1,11 @@
+Sun Sep 17 17:58:37 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+	* SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c:
+	  removed a few warnings in pedantic mode ...
+	* parserInternals.c parser.c: moved encoding switching function
+	  to parserInternals.c
+	* configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3
+
 Sat Sep 16 20:12:41 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>

 	* HTMLparser.c parser.c: set ctxt->errNo before calling the
--- a/SAX.c
+++ b/SAX.c
@ -312,7 +312,7 @@ resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
 {
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
    xmlParserInputPtr ret;
-    char *URI;
+    xmlChar *URI;
    const char *base = NULL;

    if (ctxt->input != NULL)
@ -320,7 +320,7 @@ resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
    if (base == NULL)
 	base = ctxt->directory;

-    URI = xmlBuildURI(systemId, base);
+    URI = xmlBuildURI(systemId, (const xmlChar *) base);

 #ifdef DEBUG_SAX
    fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId);
@ -423,7 +423,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
 	    ctxt->sax->warning(ctxt, 
 	     "Entity(%s) already defined in the internal subset\n", name);
 	if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
-	    char *URI;
+	    xmlChar *URI;
 	    const char *base = NULL;

 	    if (ctxt->input != NULL)
@ -431,7 +431,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
 	    if (base == NULL)
 		base = ctxt->directory;
 	
-	    URI = xmlBuildURI(systemId, base);
+	    URI = xmlBuildURI(systemId, (const xmlChar *) base);
 	    ent->URI = URI;
 	}
    } else if (ctxt->inSubset == 2) {
@ -442,7 +442,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
 	    ctxt->sax->warning(ctxt, 
 	     "Entity(%s) already defined in the external subset\n", name);
 	if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
-	    char *URI;
+	    xmlChar *URI;
 	    const char *base = NULL;

 	    if (ctxt->input != NULL)
@ -450,7 +450,7 @@ entityDecl(void *ctx, const xmlChar *name, int type,
 	    if (base == NULL)
 		base = ctxt->directory;
 	
-	    URI = xmlBuildURI(systemId, base);
+	    URI = xmlBuildURI(systemId, (const xmlChar *) base);
 	    ent->URI = URI;
 	}
    } else {
--- a/config.h.in
+++ b/config.h.in
@ -27,6 +27,9 @@
 /* Define if you have the fpclass function.  */
 #undef HAVE_FPCLASS

+/* Define if you have the iconv function.  */
+#undef HAVE_ICONV
+
 /* Define if you have the isnand function.  */
 #undef HAVE_ISNAND

@ -96,6 +99,9 @@
 /* Define if you have the <stdlib.h> header file.  */
 #undef HAVE_STDLIB_H

+/* Define if you have the <string.h> header file.  */
+#undef HAVE_STRING_H
+
 /* Define if you have the <sys/dir.h> header file.  */
 #undef HAVE_SYS_DIR_H

@ -144,3 +150,6 @@
 /* Version number of package */
 #undef VERSION

+/* Define if compiler has function prototypes */
+#undef PROTOTYPES
+
--- a/configure.in
+++ b/configure.in
@ -5,7 +5,7 @@ AM_CONFIG_HEADER(config.h)

 LIBXML_MAJOR_VERSION=2
 LIBXML_MINOR_VERSION=2
-LIBXML_MICRO_VERSION=2
+LIBXML_MICRO_VERSION=3
 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
 LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION

@ -243,6 +243,7 @@ if test "$with_iconv" = "no" ; then
    echo Disabling ICONV support
    WITH_ICONV=0
 else    
+    AC_CHECK_FUNCS(iconv)
    if test "$have_iconv" != "" ; then
        echo Iconv support not found
        WITH_ICONV=0
--- a/debugXML.c
+++ b/debugXML.c
@ -917,6 +917,9 @@ static int xmlLsCountNode(xmlNodePtr node) {
 	    break;
 	case XML_DOCUMENT_NODE:
 	case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    list = ((xmlDocPtr) node)->children;
 	    break;
 	case XML_ATTRIBUTE_NODE:
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@ -51,6 +51,6 @@ install-data-local:
 	-(cd $(DESTDIR); gtkdoc-fixxref --module=$(DOC_MODULE) --html-dir=$(HTML_DIR))

 dist-hook:
-	(cd $(srcdir) ; tar cvf - xml.html FAQ.html encoding.html structure.gif DOM.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
+	(cd $(srcdir) ; tar cvf - *.html *.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)

 .PHONY : html sgml templates scan
--- a/libxml.spec.in
+++ b/libxml.spec.in
@ -92,8 +92,8 @@ rm -rf $RPM_BUILD_ROOT
 %defattr(-, root, root)

 %doc AUTHORS ChangeLog NEWS README COPYING COPYING.LIB TODO
-%doc /usr/man/man1/xmllint.1
-%doc /usr/man/man4/libxml.4
+%doc /usr/man/man1/xmllint.1*
+%doc /usr/man/man4/libxml.4*

 %{prefix}/lib/lib*.so.*
 %{prefix}/bin/xmllint
--- a/parser.c
+++ b/parser.c
@ -899,311 +899,6 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
 }


-/************************************************************************
- *									*
- *		Commodity functions to handle encodings			*
- *									*
- ************************************************************************/
-
-/**
- * xmlSwitchEncoding:
- * @ctxt:  the parser context
- * @enc:  the encoding value (number)
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- *
- * Returns 0 in case of success, -1 otherwise
- */
-int
-xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
-{
-    xmlCharEncodingHandlerPtr handler;
-
-    switch (enc) {
-	case XML_CHAR_ENCODING_ERROR:
-	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData, "encoding unknown\n");
-	    ctxt->wellFormed = 0;
-	    ctxt->disableSAX = 1;
-	    break;
-	case XML_CHAR_ENCODING_NONE:
-	    /* let's assume it's UTF-8 without the XML decl */
-	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-	    return(0);
-	case XML_CHAR_ENCODING_UTF8:
-	    /* default encoding, no conversion should be needed */
-	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-	    return(0);
-	default:
-	    break;
-    }
-    handler = xmlGetCharEncodingHandler(enc);
-    if (handler == NULL) {
-	/*
-	 * Default handlers.
-	 */
-	switch (enc) {
-	    case XML_CHAR_ENCODING_ERROR:
-		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData, "encoding unknown\n");
-		ctxt->wellFormed = 0;
-		ctxt->disableSAX = 1;
-		ctxt->charset = XML_CHAR_ENCODING_UTF8;
-		break;
-	    case XML_CHAR_ENCODING_NONE:
-		/* let's assume it's UTF-8 without the XML decl */
-		ctxt->charset = XML_CHAR_ENCODING_UTF8;
-		return(0);
-	    case XML_CHAR_ENCODING_UTF8:
-	    case XML_CHAR_ENCODING_ASCII:
-		/* default encoding, no conversion should be needed */
-		ctxt->charset = XML_CHAR_ENCODING_UTF8;
-		return(0);
-	    case XML_CHAR_ENCODING_UTF16LE:
-		break;
-	    case XML_CHAR_ENCODING_UTF16BE:
-		break;
-	    case XML_CHAR_ENCODING_UCS4LE:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding USC4 little endian not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS4BE:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding USC4 big endian not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_EBCDIC:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding EBCDIC not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS4_2143:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding UCS4 2143 not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS4_3412:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding UCS4 3412 not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS2:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding UCS2 not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_8859_1:
-	    case XML_CHAR_ENCODING_8859_2:
-	    case XML_CHAR_ENCODING_8859_3:
-	    case XML_CHAR_ENCODING_8859_4:
-	    case XML_CHAR_ENCODING_8859_5:
-	    case XML_CHAR_ENCODING_8859_6:
-	    case XML_CHAR_ENCODING_8859_7:
-	    case XML_CHAR_ENCODING_8859_8:
-	    case XML_CHAR_ENCODING_8859_9:
-		/*
-		 * We used to keep the internal content in the
-		 * document encoding however this turns being unmaintainable
-		 * So xmlGetCharEncodingHandler() will return non-null
-		 * values for this now.
-		 */
-		if ((ctxt->inputNr == 1) &&
-		    (ctxt->encoding == NULL) &&
-		    (ctxt->input->encoding != NULL)) {
-		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
-		}
-		ctxt->charset = enc;
-		return(0);
-	    case XML_CHAR_ENCODING_2022_JP:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding ISO-2022-JPnot supported\n");
-		break;
-	    case XML_CHAR_ENCODING_SHIFT_JIS:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding Shift_JIS not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_EUC_JP:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding EUC-JPnot supported\n");
-		break;
-	}
-    }
-    if (handler == NULL)
-	return(-1);
-    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    return(xmlSwitchToEncoding(ctxt, handler));
-}
-
-/**
- * xmlSwitchToEncoding:
- * @ctxt:  the parser context
- * @handler:  the encoding handler
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- *
- * Returns 0 in case of success, -1 otherwise
- */
-int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
-{
-    int nbchars;
-
-    if (handler != NULL) {
-        if (ctxt->input != NULL) {
-	    if (ctxt->input->buf != NULL) {
-	        if (ctxt->input->buf->encoder != NULL) {
-		    if (ctxt->input->buf->encoder == handler)
-			return(0);
-		    /*
-		     * Note: this is a bit dangerous, but that's what it
-		     * takes to use nearly compatible signature for different
-		     * encodings.
-		     */
-		    xmlCharEncCloseFunc(ctxt->input->buf->encoder);
-		    ctxt->input->buf->encoder = handler;
-		    return(0);
-		}
-		ctxt->input->buf->encoder = handler;
-
-	        /*
-		 * Is there already some content down the pipe to convert ?
-		 */
-	        if ((ctxt->input->buf->buffer != NULL) &&
-		    (ctxt->input->buf->buffer->use > 0)) {
-		    int processed;
-
-		    /*
-		     * Specific handling of the Byte Order Mark for 
-		     * UTF-16
-		     */
-		    if ((handler->name != NULL) &&
-			(!strcmp(handler->name, "UTF-16LE")) && 
-		        (ctxt->input->cur[0] == 0xFF) &&
-		        (ctxt->input->cur[1] == 0xFE)) {
-			ctxt->input->cur += 2;
-		    }
-		    if ((handler->name != NULL) &&
-			(!strcmp(handler->name, "UTF-16BE")) && 
-		        (ctxt->input->cur[0] == 0xFE) &&
-		        (ctxt->input->cur[1] == 0xFF)) {
-			ctxt->input->cur += 2;
-		    }
-
-		    /*
-		     * Shring the current input buffer.
-		     * Move it as the raw buffer and create a new input buffer
-		     */
-		    processed = ctxt->input->cur - ctxt->input->base;
-		    xmlBufferShrink(ctxt->input->buf->buffer, processed);
-		    ctxt->input->buf->raw = ctxt->input->buf->buffer;
-		    ctxt->input->buf->buffer = xmlBufferCreate();
-
-		    if (ctxt->html) {
-			/*
-			 * converst as much as possbile of the buffer
-			 */
-			nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
-				                   ctxt->input->buf->buffer,
-						   ctxt->input->buf->raw);
-		    } else {
-			/*
-			 * convert just enough to get
-			 * '<?xml version="1.0" encoding="xxx"?>'
-			 * parsed with the autodetected encoding
-			 * into the parser reading buffer.
-			 */
-			nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
-						      ctxt->input->buf->buffer,
-						      ctxt->input->buf->raw);
-		    }
-		    if (nbchars < 0) {
-			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
-			return(-1);
-		    }
-		    ctxt->input->base =
-		    ctxt->input->cur = ctxt->input->buf->buffer->content;
-
-		}
-		return(0);
-	    } else {
-	        if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
-		    /*
-		     * When parsing a static memory array one must know the
-		     * size to be able to convert the buffer.
-		     */
-		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			ctxt->sax->error(ctxt->userData,
-					 "xmlSwitchEncoding : no input\n");
-		    return(-1);
-		} else {
-		    int processed;
-
-		    /*
-		     * Shring the current input buffer.
-		     * Move it as the raw buffer and create a new input buffer
-		     */
-		    processed = ctxt->input->cur - ctxt->input->base;
-
-		    ctxt->input->buf->raw = xmlBufferCreate();
-		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
-				 ctxt->input->length - processed);
-		    ctxt->input->buf->buffer = xmlBufferCreate();
-
-		    /*
-		     * convert as much as possible of the raw input
-		     * to the parser reading buffer.
-		     */
-		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
-		                               ctxt->input->buf->buffer,
-					       ctxt->input->buf->raw);
-		    if (nbchars < 0) {
-			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
-			return(-1);
-		    }
-
-		    /*
-		     * Conversion succeeded, get rid of the old buffer
-		     */
-		    if ((ctxt->input->free != NULL) &&
-		        (ctxt->input->base != NULL))
-			ctxt->input->free((xmlChar *) ctxt->input->base);
-		    ctxt->input->base =
-		    ctxt->input->cur = ctxt->input->buf->buffer->content;
-		}
-	    }
-	} else {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-	        ctxt->sax->error(ctxt->userData,
-		                 "xmlSwitchEncoding : no input\n");
-	    return(-1);
-	}
-	/*
-	 * The parsing is now done in UTF8 natively
-	 */
-	ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    } else 
-	return(-1);
-    return(0);
-
-}
-
 /************************************************************************
 *									*
 *		Commodity functions to handle xmlChars			*
--- a/parserInternals.c
+++ b/parserInternals.c
@ -1494,6 +1494,311 @@ xmlCopyChar(int len, xmlChar *out, int val) {
    return(1);
 }

+/************************************************************************
+ *									*
+ *		Commodity functions to switch encodings			*
+ *									*
+ ************************************************************************/
+
+/**
+ * xmlSwitchEncoding:
+ * @ctxt:  the parser context
+ * @enc:  the encoding value (number)
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
+{
+    xmlCharEncodingHandlerPtr handler;
+
+    switch (enc) {
+	case XML_CHAR_ENCODING_ERROR:
+	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+	    ctxt->wellFormed = 0;
+	    ctxt->disableSAX = 1;
+	    break;
+	case XML_CHAR_ENCODING_NONE:
+	    /* let's assume it's UTF-8 without the XML decl */
+	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+	    return(0);
+	case XML_CHAR_ENCODING_UTF8:
+	    /* default encoding, no conversion should be needed */
+	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+	    return(0);
+	default:
+	    break;
+    }
+    handler = xmlGetCharEncodingHandler(enc);
+    if (handler == NULL) {
+	/*
+	 * Default handlers.
+	 */
+	switch (enc) {
+	    case XML_CHAR_ENCODING_ERROR:
+		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+		ctxt->wellFormed = 0;
+		ctxt->disableSAX = 1;
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		break;
+	    case XML_CHAR_ENCODING_NONE:
+		/* let's assume it's UTF-8 without the XML decl */
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		return(0);
+	    case XML_CHAR_ENCODING_UTF8:
+	    case XML_CHAR_ENCODING_ASCII:
+		/* default encoding, no conversion should be needed */
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		return(0);
+	    case XML_CHAR_ENCODING_UTF16LE:
+		break;
+	    case XML_CHAR_ENCODING_UTF16BE:
+		break;
+	    case XML_CHAR_ENCODING_UCS4LE:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding USC4 little endian not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4BE:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding USC4 big endian not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_EBCDIC:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding EBCDIC not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4_2143:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS4 2143 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4_3412:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS4 3412 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS2:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS2 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_8859_1:
+	    case XML_CHAR_ENCODING_8859_2:
+	    case XML_CHAR_ENCODING_8859_3:
+	    case XML_CHAR_ENCODING_8859_4:
+	    case XML_CHAR_ENCODING_8859_5:
+	    case XML_CHAR_ENCODING_8859_6:
+	    case XML_CHAR_ENCODING_8859_7:
+	    case XML_CHAR_ENCODING_8859_8:
+	    case XML_CHAR_ENCODING_8859_9:
+		/*
+		 * We used to keep the internal content in the
+		 * document encoding however this turns being unmaintainable
+		 * So xmlGetCharEncodingHandler() will return non-null
+		 * values for this now.
+		 */
+		if ((ctxt->inputNr == 1) &&
+		    (ctxt->encoding == NULL) &&
+		    (ctxt->input->encoding != NULL)) {
+		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
+		}
+		ctxt->charset = enc;
+		return(0);
+	    case XML_CHAR_ENCODING_2022_JP:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding ISO-2022-JPnot supported\n");
+		break;
+	    case XML_CHAR_ENCODING_SHIFT_JIS:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding Shift_JIS not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_EUC_JP:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding EUC-JPnot supported\n");
+		break;
+	}
+    }
+    if (handler == NULL)
+	return(-1);
+    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    return(xmlSwitchToEncoding(ctxt, handler));
+}
+
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt:  the parser context
+ * @handler:  the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
+{
+    int nbchars;
+
+    if (handler != NULL) {
+        if (ctxt->input != NULL) {
+	    if (ctxt->input->buf != NULL) {
+	        if (ctxt->input->buf->encoder != NULL) {
+		    if (ctxt->input->buf->encoder == handler)
+			return(0);
+		    /*
+		     * Note: this is a bit dangerous, but that's what it
+		     * takes to use nearly compatible signature for different
+		     * encodings.
+		     */
+		    xmlCharEncCloseFunc(ctxt->input->buf->encoder);
+		    ctxt->input->buf->encoder = handler;
+		    return(0);
+		}
+		ctxt->input->buf->encoder = handler;
+
+	        /*
+		 * Is there already some content down the pipe to convert ?
+		 */
+	        if ((ctxt->input->buf->buffer != NULL) &&
+		    (ctxt->input->buf->buffer->use > 0)) {
+		    int processed;
+
+		    /*
+		     * Specific handling of the Byte Order Mark for 
+		     * UTF-16
+		     */
+		    if ((handler->name != NULL) &&
+			(!strcmp(handler->name, "UTF-16LE")) && 
+		        (ctxt->input->cur[0] == 0xFF) &&
+		        (ctxt->input->cur[1] == 0xFE)) {
+			ctxt->input->cur += 2;
+		    }
+		    if ((handler->name != NULL) &&
+			(!strcmp(handler->name, "UTF-16BE")) && 
+		        (ctxt->input->cur[0] == 0xFE) &&
+		        (ctxt->input->cur[1] == 0xFF)) {
+			ctxt->input->cur += 2;
+		    }
+
+		    /*
+		     * Shring the current input buffer.
+		     * Move it as the raw buffer and create a new input buffer
+		     */
+		    processed = ctxt->input->cur - ctxt->input->base;
+		    xmlBufferShrink(ctxt->input->buf->buffer, processed);
+		    ctxt->input->buf->raw = ctxt->input->buf->buffer;
+		    ctxt->input->buf->buffer = xmlBufferCreate();
+
+		    if (ctxt->html) {
+			/*
+			 * converst as much as possbile of the buffer
+			 */
+			nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+				                   ctxt->input->buf->buffer,
+						   ctxt->input->buf->raw);
+		    } else {
+			/*
+			 * convert just enough to get
+			 * '<?xml version="1.0" encoding="xxx"?>'
+			 * parsed with the autodetected encoding
+			 * into the parser reading buffer.
+			 */
+			nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
+						      ctxt->input->buf->buffer,
+						      ctxt->input->buf->raw);
+		    }
+		    if (nbchars < 0) {
+			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+			return(-1);
+		    }
+		    ctxt->input->base =
+		    ctxt->input->cur = ctxt->input->buf->buffer->content;
+
+		}
+		return(0);
+	    } else {
+	        if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
+		    /*
+		     * When parsing a static memory array one must know the
+		     * size to be able to convert the buffer.
+		     */
+		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+			ctxt->sax->error(ctxt->userData,
+					 "xmlSwitchEncoding : no input\n");
+		    return(-1);
+		} else {
+		    int processed;
+
+		    /*
+		     * Shring the current input buffer.
+		     * Move it as the raw buffer and create a new input buffer
+		     */
+		    processed = ctxt->input->cur - ctxt->input->base;
+
+		    ctxt->input->buf->raw = xmlBufferCreate();
+		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
+				 ctxt->input->length - processed);
+		    ctxt->input->buf->buffer = xmlBufferCreate();
+
+		    /*
+		     * convert as much as possible of the raw input
+		     * to the parser reading buffer.
+		     */
+		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+		                               ctxt->input->buf->buffer,
+					       ctxt->input->buf->raw);
+		    if (nbchars < 0) {
+			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+			return(-1);
+		    }
+
+		    /*
+		     * Conversion succeeded, get rid of the old buffer
+		     */
+		    if ((ctxt->input->free != NULL) &&
+		        (ctxt->input->base != NULL))
+			ctxt->input->free((xmlChar *) ctxt->input->base);
+		    ctxt->input->base =
+		    ctxt->input->cur = ctxt->input->buf->buffer->content;
+		}
+	    }
+	} else {
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	        ctxt->sax->error(ctxt->userData,
+		                 "xmlSwitchEncoding : no input\n");
+	    return(-1);
+	}
+	/*
+	 * The parsing is now done in UTF8 natively
+	 */
+	ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    } else 
+	return(-1);
+    return(0);
+
+}
+
 /************************************************************************
 *									*
 *	Commodity functions to handle entities processing		*
@ -1705,7 +2010,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
 	return(NULL);

    URI = xmlStrdup((xmlChar *) filename);
-    directory = xmlParserGetDirectory(URI);
+    directory = xmlParserGetDirectory((const char *) URI);

    inputStream = xmlNewInputStream(ctxt);
    if (inputStream == NULL) {
@ -1714,7 +2019,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
 	return(NULL);
    }

-    inputStream->filename = URI;
+    inputStream->filename = (const char *) URI;
    inputStream->directory = directory;
    inputStream->buf = buf;

--- a/tree.c
+++ b/tree.c
@ -2637,6 +2637,9 @@ xmlNodeSetLang(xmlNodePtr cur, const xmlChar *lang) {
        case XML_PI_NODE:
        case XML_ENTITY_REF_NODE:
        case XML_ENTITY_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return;
        case XML_ELEMENT_NODE:
        case XML_ATTRIBUTE_NODE:
@ -2719,6 +2722,9 @@ xmlNodeSetName(xmlNodePtr cur, const xmlChar *name) {
        case XML_DOCUMENT_FRAG_NODE:
        case XML_NOTATION_NODE:
        case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return;
        case XML_ELEMENT_NODE:
        case XML_ATTRIBUTE_NODE:
@ -2845,6 +2851,9 @@ xmlNodeGetContent(xmlNodePtr cur) {
        case XML_DOCUMENT_TYPE_NODE:
        case XML_NOTATION_NODE:
        case XML_DTD_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return(NULL);
        case XML_ELEMENT_DECL:
 	    /* TODO !!! */
@ -2930,6 +2939,9 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) {
        case XML_DOCUMENT_NODE:
        case XML_HTML_DOCUMENT_NODE:
        case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    break;
        case XML_NOTATION_NODE:
 	    break;
@ -3012,6 +3024,9 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
        case XML_DTD_NODE:
        case XML_HTML_DOCUMENT_NODE:
        case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    break;
        case XML_ELEMENT_DECL:
 	    /* TODO !!! */
@ -3096,6 +3111,9 @@ xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
        case XML_DTD_NODE:
        case XML_HTML_DOCUMENT_NODE:
        case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    break;
        case XML_ELEMENT_DECL:
        case XML_ATTRIBUTE_DECL:
--- a/valid.c
+++ b/valid.c
@ -3561,6 +3561,9 @@ xmlSprintfElementChilds(char *buf, xmlNodePtr node, int glob) {
 		 break;
            case XML_ATTRIBUTE_NODE:
            case XML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
 	    case XML_HTML_DOCUMENT_NODE:
            case XML_DOCUMENT_TYPE_NODE:
            case XML_DOCUMENT_FRAG_NODE:
--- a/xpath.c
+++ b/xpath.c
@ -1504,6 +1504,9 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_DOCUMENT_TYPE_NODE:
            case XML_DOCUMENT_FRAG_NODE:
            case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
 		return(((xmlDocPtr) ctxt->context->node)->children);
 	    case XML_ELEMENT_DECL:
 	    case XML_ATTRIBUTE_DECL:
@ -1632,6 +1635,9 @@ xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_DOCUMENT_TYPE_NODE:
            case XML_DOCUMENT_FRAG_NODE:
            case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
                return(NULL);
 	}
    }
@ -1686,6 +1692,9 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_DOCUMENT_TYPE_NODE:
            case XML_DOCUMENT_FRAG_NODE:
            case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
                return(NULL);
 	}
 	return(NULL);
@ -1717,6 +1726,9 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
 	case XML_DOCUMENT_TYPE_NODE:
 	case XML_DOCUMENT_FRAG_NODE:
 	case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return(NULL);
    }
    return(NULL);