Better entities and char ref encoding, error msg formatting, Daniel.

This commit is contained in:
Daniel Veillard 1998-11-16 01:04:26 +00:00
parent 242590ee5d
commit 8cc0d1f889
3 changed files with 49 additions and 11 deletions

View File

@ -1,3 +1,8 @@
Sun Nov 15 19:59:47 EST 1998 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c, entities.c: improve entities and char ref encoding,
and cleanups of error messages.
Fri Nov 13 13:03:10 EST 1998 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c, entities.c: simple bug hunting done during rpm2html and

View File

@ -248,6 +248,15 @@ xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) {
return(NULL);
}
/*
* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
* | [#x10000-#x10FFFF]
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
(((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
/*
* xmlEncodeEntities : do a global encoding of a string, replacing the
* predefined entities and non ASCII values with their
@ -255,6 +264,7 @@ xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) {
* TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii
* get erroneous.
*/
CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) {
const CHAR *cur = input;
CHAR *out = buffer;
@ -310,6 +320,12 @@ CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) {
*out++ = 'o';
*out++ = 's';
*out++ = ';';
} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
(*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
/*
* default case, just copy !
*/
*out++ = *cur;
#ifndef USE_UTF_8
} else if ((sizeof(CHAR) == 1) && (*cur >= 0x80)) {
char buf[10], *ptr;
@ -321,12 +337,26 @@ CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) {
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
#endif
} else {
/*
* default case, just copy !
*/
*out++ = *cur;
} else if (IS_CHAR(*cur)) {
char buf[10], *ptr;
#ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur);
#else
sprintf(buf, "&#%d;", *cur);
#endif
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
}
#if 0
else {
/*
* default case, this is not a valid char !
* Skip it...
*/
fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur);
}
#endif
cur++;
}
*out++ = 0;

View File

@ -1080,7 +1080,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
if (CUR != '"') {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q);
ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
} else {
ret = xmlStrndup(q, CUR_PTR - q);
NEXT;
@ -1091,7 +1091,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
if (CUR != '\'') {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q);
ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
} else {
ret = xmlStrndup(q, CUR_PTR - q);
NEXT;
@ -2253,7 +2253,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
val = val * 16 + (CUR - 'A') + 10;
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
ctxt->sax->error(ctxt,
"xmlParseCharRef: invalid hexa value\n");
val = 0;
break;
}
@ -2268,7 +2269,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
val = val * 10 + (CUR - '0');
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
ctxt->sax->error(ctxt,
"xmlParseCharRef: invalid decimal value\n");
val = 0;
break;
}
@ -2289,7 +2291,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
return(xmlStrndup(buf, 1));
} else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value");
ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
val);
}
return(NULL);
}
@ -3932,7 +3935,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
if ( tmp_buffer == NULL ) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt, "Out of memory");
ctxt->sax->error(ctxt, "Out of memory\n");
return;
}
ctxt->node_seq.buffer = tmp_buffer;