applied UTF-16 encoding handling patch provided by Mark Itzcovitz more

* encoding.c: applied UTF-16 encoding handling patch provided by
  Mark Itzcovitz
* encoding.c parser.c: more cleanup and fixes for UTF-16 when
  not having iconv support.
Daniel
This commit is contained in:
Daniel Veillard 2003-08-14 12:19:54 +00:00
parent ccf996fca0
commit ab1ae3a768
4 changed files with 34 additions and 19 deletions

View File

@ -1,3 +1,10 @@
Thu Aug 14 14:13:43 CEST 2003 Daniel Veillard <daniel@veillard.com>
* encoding.c: applied UTF-16 encoding handling patch provided by
Mark Itzcovitz
* encoding.c parser.c: more cleanup and fixes for UTF-16 when
not having iconv support.
Thu Aug 14 03:19:08 CEST 2003 Daniel Veillard <daniel@veillard.com>
* Makefile.am configure.in example/Makefile.am libxml.h nanoftp.c
@ -8,7 +15,7 @@ Thu Aug 14 02:28:36 CEST 2003 Daniel Veillard <daniel@veillard.com>
* parser.c: fixed the serious CPU usage problem reported by
Grant Goodale
* HTMLparser.c: applied patch from Oliver Kidman about a free
* HTMLparser.c: applied patch from Oliver Stoeneberg about a free
missing in htmlSAXParseDoc
Tue Aug 12 22:48:10 HKT 2003 William Brack <wbrack@mmm.com.hk>

View File

@ -875,6 +875,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
{
unsigned short* out = (unsigned short*) outb;
const unsigned char* processed = in;
const unsigned char *const instart = in;
unsigned short* outstart= out;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
@ -909,7 +910,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
else if (d < 0xC0) {
/* trailing byte in leading position */
*outlen = (out - outstart) * 2;
*inlen = processed - in;
*inlen = processed - instart;
return(-2);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
@ -917,7 +918,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
else {
/* no chance for this in UTF-16 */
*outlen = (out - outstart) * 2;
*inlen = processed - in;
*inlen = processed - instart;
return(-2);
}
@ -971,7 +972,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
processed = in;
}
*outlen = (out - outstart) * 2;
*inlen = processed - in;
*inlen = processed - instart;
return(0);
}
@ -1086,6 +1087,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
{
unsigned short* out = (unsigned short*) outb;
const unsigned char* processed = in;
const unsigned char *const instart = in;
unsigned short* outstart= out;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
@ -1120,7 +1122,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
else if (d < 0xC0) {
/* trailing byte in leading position */
*outlen = out - outstart;
*inlen = processed - in;
*inlen = processed - instart;
return(-2);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
@ -1128,7 +1130,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
else {
/* no chance for this in UTF-16 */
*outlen = out - outstart;
*inlen = processed - in;
*inlen = processed - instart;
return(-2);
}
@ -1179,7 +1181,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
processed = in;
}
*outlen = (out - outstart) * 2;
*inlen = processed - in;
*inlen = processed - instart;
return(0);
}
@ -1962,6 +1964,14 @@ xmlFindCharEncodingHandler(const char *name) {
}
}
/*
* If nothing was found and it is "UTF-16" then use the Little indian
* version.
*/
if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) ||
(xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16")))
return(xmlUTF16LEHandler);
return(NULL);
}

View File

@ -295,7 +295,7 @@ void xmlParseElement (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
const xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
void xmlParseTextDecl (xmlParserCtxtPtr ctxt);

View File

@ -7518,7 +7518,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
* Returns the encoding value or NULL
*/
xmlChar *
const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
xmlChar *encoding = NULL;
const xmlChar *q;
@ -7582,23 +7582,21 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
if ((encoding != NULL) &&
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
if (ctxt->input->encoding != NULL)
xmlFree((xmlChar *) ctxt->input->encoding);
ctxt->input->encoding = encoding;
encoding = NULL;
if (ctxt->encoding != NULL)
xmlFree((xmlChar *) ctxt->encoding);
ctxt->encoding = encoding;
}
/*
* UTF-8 encoding is handled natively
*/
if ((encoding != NULL) &&
else if ((encoding != NULL) &&
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
if (ctxt->input->encoding != NULL)
xmlFree((xmlChar *) ctxt->input->encoding);
ctxt->input->encoding = encoding;
encoding = NULL;
if (ctxt->encoding != NULL)
xmlFree((xmlChar *) ctxt->encoding);
ctxt->encoding = encoding;
}
if (encoding != NULL) {
else if (encoding != NULL) {
xmlCharEncodingHandlerPtr handler;
if (ctxt->input->encoding != NULL)