/* * parser.c : an XML 1.0 non-verifying parser * * See Copyright for the status of this software. * * Daniel.Veillard@w3.org */ #ifdef WIN32 #include "win32config.h" #else #include "config.h" #endif #include #include /* for memset() only */ #ifdef HAVE_CTYPE_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_ZLIB_H #include #endif #include #include #include #include #include #include #include #include #include #include "xml-error.h" #define XML_PARSER_BIG_BUFFER_SIZE 1000 #define XML_PARSER_BUFFER_SIZE 100 int xmlGetWarningsDefaultValue = 1; /* * List of XML prefixed PI allowed by W3C specs */ const char *xmlW3CPIs[] = { "xml-stylesheet", NULL }; void xmlParserHandleReference(xmlParserCtxtPtr ctxt); void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str); /* * Version handling */ const char *xmlParserVersion = LIBXML_VERSION_STRING; /* * xmlCheckVersion: * @version: the include version number * * check the compiled lib version against the include one. * This can warn or immediately kill the application */ void xmlCheckVersion(int version) { int myversion = (int) LIBXML_VERSION; if ((myversion / 10000) != (version / 10000)) { fprintf(stderr, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); exit(1); } if ((myversion / 100) < (version / 100)) { fprintf(stderr, "Warning: program compiled against libxml %d using older %d\n", (version / 100), (myversion / 100)); } } /************************************************************************ * * * Input handling functions for progressive parsing * * * ************************************************************************/ /* #define DEBUG_INPUT */ /* #define DEBUG_STACK */ /* #define DEBUG_PUSH */ #define INPUT_CHUNK 250 /* we need to keep enough input to show errors in context */ #define LINE_LEN 80 #ifdef DEBUG_INPUT #define CHECK_BUFFER(in) check_buffer(in) void check_buffer(xmlParserInputPtr in) { if (in->base != in->buf->buffer->content) { fprintf(stderr, "xmlParserInput: base mismatch problem\n"); } if (in->cur < in->base) { fprintf(stderr, "xmlParserInput: cur < base problem\n"); } if (in->cur > in->base + in->buf->buffer->use) { fprintf(stderr, "xmlParserInput: cur > base + use problem\n"); } fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n", (int) in, (int) in->buf->buffer->content, in->cur - in->base, in->buf->buffer->use, in->buf->buffer->size); } #else #define CHECK_BUFFER(in) #endif /** * xmlParserInputRead: * @in: an XML parser input * @len: an indicative size for the lookahead * * This function refresh the input for the parser. It doesn't try to * preserve pointers to the input buffer, and discard already read data * * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */ int xmlParserInputRead(xmlParserInputPtr in, int len) { int ret; int used; int index; #ifdef DEBUG_INPUT fprintf(stderr, "Read\n"); #endif if (in->buf == NULL) return(-1); if (in->base == NULL) return(-1); if (in->cur == NULL) return(-1); if (in->buf->buffer == NULL) return(-1); CHECK_BUFFER(in); used = in->cur - in->buf->buffer->content; ret = xmlBufferShrink(in->buf->buffer, used); if (ret > 0) { in->cur -= ret; in->consumed += ret; } ret = xmlParserInputBufferRead(in->buf, len); if (in->base != in->buf->buffer->content) { /* * the buffer has been realloced */ index = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[index]; } CHECK_BUFFER(in); return(ret); } /** * xmlParserInputGrow: * @in: an XML parser input * @len: an indicative size for the lookahead * * This function increase the input for the parser. It tries to * preserve pointers to the input buffer, and keep already read data * * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */ int xmlParserInputGrow(xmlParserInputPtr in, int len) { int ret; int index; #ifdef DEBUG_INPUT fprintf(stderr, "Grow\n"); #endif if (in->buf == NULL) return(-1); if (in->base == NULL) return(-1); if (in->cur == NULL) return(-1); if (in->buf->buffer == NULL) return(-1); CHECK_BUFFER(in); index = in->cur - in->base; if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) { CHECK_BUFFER(in); return(0); } if (in->buf->readcallback != NULL) ret = xmlParserInputBufferGrow(in->buf, len); else return(0); /* * NOTE : in->base may be a "dandling" i.e. freed pointer in this * block, but we use it really as an integer to do some * pointer arithmetic. Insure will raise it as a bug but in * that specific case, that's not ! */ if (in->base != in->buf->buffer->content) { /* * the buffer has been realloced */ index = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[index]; } CHECK_BUFFER(in); return(ret); } /** * xmlParserInputShrink: * @in: an XML parser input * * This function removes used input for the parser. */ void xmlParserInputShrink(xmlParserInputPtr in) { int used; int ret; int index; #ifdef DEBUG_INPUT fprintf(stderr, "Shrink\n"); #endif if (in->buf == NULL) return; if (in->base == NULL) return; if (in->cur == NULL) return; if (in->buf->buffer == NULL) return; CHECK_BUFFER(in); used = in->cur - in->buf->buffer->content; if (used > INPUT_CHUNK) { ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { in->cur -= ret; in->consumed += ret; } } CHECK_BUFFER(in); if (in->buf->buffer->use > INPUT_CHUNK) { return; } xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); if (in->base != in->buf->buffer->content) { /* * the buffer has been realloced */ index = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[index]; } CHECK_BUFFER(in); } /************************************************************************ * * * Parser stacks related functions and macros * * * ************************************************************************/ int xmlSubstituteEntitiesDefaultValue = 0; int xmlDoValidityCheckingDefaultValue = 0; int xmlKeepBlanksDefaultValue = 1; xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str); /* * Generic function for accessing stacks in the Parser Context */ #define PUSH_AND_POP(scope, type, name) \ scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ if (ctxt->name##Nr >= ctxt->name##Max) { \ ctxt->name##Max *= 2; \ ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \ ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ if (ctxt->name##Tab == NULL) { \ fprintf(stderr, "realloc failed !\n"); \ return(0); \ } \ } \ ctxt->name##Tab[ctxt->name##Nr] = value; \ ctxt->name = value; \ return(ctxt->name##Nr++); \ } \ scope type name##Pop(xmlParserCtxtPtr ctxt) { \ type ret; \ if (ctxt->name##Nr <= 0) return(0); \ ctxt->name##Nr--; \ if (ctxt->name##Nr > 0) \ ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ else \ ctxt->name = NULL; \ ret = ctxt->name##Tab[ctxt->name##Nr]; \ ctxt->name##Tab[ctxt->name##Nr] = 0; \ return(ret); \ } \ PUSH_AND_POP(extern, xmlParserInputPtr, input) PUSH_AND_POP(extern, xmlNodePtr, node) PUSH_AND_POP(extern, xmlChar*, name) int spacePush(xmlParserCtxtPtr ctxt, int val) { if (ctxt->spaceNr >= ctxt->spaceMax) { ctxt->spaceMax *= 2; ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab, ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); if (ctxt->spaceTab == NULL) { fprintf(stderr, "realloc failed !\n"); return(0); } } ctxt->spaceTab[ctxt->spaceNr] = val; ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; return(ctxt->spaceNr++); } int spacePop(xmlParserCtxtPtr ctxt) { int ret; if (ctxt->spaceNr <= 0) return(0); ctxt->spaceNr--; if (ctxt->spaceNr > 0) ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; else ctxt->space = NULL; ret = ctxt->spaceTab[ctxt->spaceNr]; ctxt->spaceTab[ctxt->spaceNr] = -1; return(ret); } /* * Macros for accessing the content. Those should be used only by the parser, * and not exported. * * Dirty macros, i.e. one need to make assumption on the context to use them * * CUR_PTR return the current pointer to the xmlChar to be parsed. * To be used with extreme caution since operations consuming * characters may move the input buffer to a different location ! * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled * in ISO-Latin or UTF-8. * This should be used internally by the parser * only to compare to ASCII values otherwise it would break when * running with UTF-8 encoding. * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings within the parser. * * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * * NEXT Skip to the next character, this does the proper decoding * in UTF-8 mode. It also pop-up unfinished entities on the fly. * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly * CUR_CHAR Return the current char as an int as well as its lenght. */ #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) #define NXT(val) ctxt->input->cur[(val)] #define CUR_PTR ctxt->input->cur #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt) #define SHRINK xmlParserInputShrink(ctxt->input); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt) #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt) #define SKIP_BLANKS xmlSkipBlankChars(ctxt); #define NEXT xmlNextChar(ctxt); #define NEXTL(l) \ if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ ctxt->token = 0; ctxt->input->cur += l; \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l); #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l); #define COPY_BUF(l,b,i,v) \ if (l == 1) b[i++] = (xmlChar) v; \ else i += xmlCopyChar(l,&b[i],v); /** * xmlNextChar: * @ctxt: the XML parser context * * Skip to the next char input char. */ void xmlNextChar(xmlParserCtxtPtr ctxt) { if (ctxt->instate == XML_PARSER_EOF) return; /* * TODO: 2.11 End-of-Line Handling * the literal two-character sequence "#xD#xA" or a standalone * literal #xD, an XML processor must pass to the application * the single character #xA. */ if (ctxt->token != 0) ctxt->token = 0; else { if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && (ctxt->instate != XML_PARSER_COMMENT)) { /* * If we are at the end of the current entity and * the context allows it, we pop consumed entities * automatically. * TODO: the auto closing should be blocked in other cases */ xmlPopInput(ctxt); } else { if (*(ctxt->input->cur) == '\n') { ctxt->input->line++; ctxt->input->col = 1; } else ctxt->input->col++; if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ const unsigned char *cur = ctxt->input->cur; unsigned char c; c = *cur; if (c & 0x80) { if (cur[1] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { unsigned int val; if (cur[2] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (cur[3] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ ctxt->input->cur += 4; val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; } else { /* 3-byte code */ ctxt->input->cur += 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; } if (((val > 0xd7ff) && (val < 0xe000)) || ((val > 0xfffd) && (val < 0x10000)) || (val >= 0x110000)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Char 0x%X out of allowed range\n", val); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } else /* 2-byte code */ ctxt->input->cur += 2; } else /* 1-byte code */ ctxt->input->cur++; } else { /* * Assume it's a fixed lenght encoding (1) with * a compatibke encoding for the ASCII set, since * XML constructs only use < 128 chars */ ctxt->input->cur++; } ctxt->nbChars++; if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } } if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) xmlPopInput(ctxt); return; encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertized in the * declaration header. Report the error and switch the encoding * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { ctxt->sax->error(ctxt->userData, "Input is not proper UTF-8, indicate encoding !\n"); ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", ctxt->input->cur[0], ctxt->input->cur[1], ctxt->input->cur[2], ctxt->input->cur[3]); } ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->charset = XML_CHAR_ENCODING_8859_1; ctxt->input->cur++; return; } /** * xmlCurrentChar: * @ctxt: the XML parser context * @len: pointer to the length of the char read * * The current char value, if using UTF-8 this may actaully span multiple * bytes in the input buffer. Implement the end of line normalization: * 2.11 End-of-Line Handling * Wherever an external parsed entity or the literal entity value * of an internal parsed entity contains either the literal two-character * sequence "#xD#xA" or a standalone literal #xD, an XML processor * must pass to the application the single character #xA. * This behavior can conveniently be produced by normalizing all * line breaks to #xA on input, before parsing.) * * Returns the current char value and its lenght */ int xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { if (ctxt->instate == XML_PARSER_EOF) return(0); if (ctxt->token != 0) { *len = 0; return(ctxt->token); } if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ const unsigned char *cur = ctxt->input->cur; unsigned char c; unsigned int val; c = *cur; if (c & 0x80) { if (cur[1] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { if (cur[2] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (cur[3] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ *len = 4; val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; } else { /* 3-byte code */ *len = 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; } } else { /* 2-byte code */ *len = 2; val = (cur[0] & 0x1f) << 6; val |= cur[1] & 0x3f; } if (!IS_CHAR(val)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Char 0x%X out of allowed range\n", val); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(val); } else { /* 1-byte code */ *len = 1; if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { ctxt->nbChars++; ctxt->input->cur++; } return(0xA); } return((int) *ctxt->input->cur); } } /* * Assume it's a fixed lenght encoding (1) with * a compatibke encoding for the ASCII set, since * XML constructs only use < 128 chars */ *len = 1; if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { ctxt->nbChars++; ctxt->input->cur++; } return(0xA); } return((int) *ctxt->input->cur); encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertized in the * declaration header. Report the error and switch the encoding * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { ctxt->sax->error(ctxt->userData, "Input is not proper UTF-8, indicate encoding !\n"); ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", ctxt->input->cur[0], ctxt->input->cur[1], ctxt->input->cur[2], ctxt->input->cur[3]); } ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->charset = XML_CHAR_ENCODING_8859_1; *len = 1; return((int) *ctxt->input->cur); } /** * xmlStringCurrentChar: * @ctxt: the XML parser context * @cur: pointer to the beginning of the char * @len: pointer to the length of the char read * * The current char value, if using UTF-8 this may actaully span multiple * bytes in the input buffer. * * Returns the current char value and its lenght */ int xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) { if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ unsigned char c; unsigned int val; c = *cur; if (c & 0x80) { if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ *len = 4; val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; } else { /* 3-byte code */ *len = 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; } } else { /* 2-byte code */ *len = 2; val = (cur[0] & 0x1f) << 6; val |= cur[2] & 0x3f; } if (!IS_CHAR(val)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Char 0x%X out of allowed range\n", val); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(val); } else { /* 1-byte code */ *len = 1; return((int) *cur); } } /* * Assume it's a fixed lenght encoding (1) with * a compatibke encoding for the ASCII set, since * XML constructs only use < 128 chars */ *len = 1; return((int) *cur); encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertized in the * declaration header. Report the error and switch the encoding * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { ctxt->sax->error(ctxt->userData, "Input is not proper UTF-8, indicate encoding !\n"); ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", ctxt->input->cur[0], ctxt->input->cur[1], ctxt->input->cur[2], ctxt->input->cur[3]); } ctxt->errNo = XML_ERR_INVALID_ENCODING; *len = 1; return((int) *cur); } /** * xmlCopyChar: * @len: pointer to the length of the char read (or zero) * @array: pointer to an arry of xmlChar * @val: the char value * * append the char value in the array * * Returns the number of xmlChar written */ int xmlCopyChar(int len, xmlChar *out, int val) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ if (len == 0) { if (val < 0) len = 0; else if (val < 0x80) len = 1; else if (val < 0x800) len = 2; else if (val < 0x10000) len = 3; else if (val < 0x110000) len = 4; if (len == 0) { fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n", val); return(0); } } if (len > 1) { int bits; if (val < 0x80) { *out++= val; bits= -6; } else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; } else { *out++= (val >> 18) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) *out++= ((val >> bits) & 0x3F) | 0x80 ; return(len); } *out = (xmlChar) val; return(1); } /** * xmlSkipBlankChars: * @ctxt: the XML parser context * * skip all blanks character found at that point in the input streams. * It pops up finished entities in the process if allowable at that point. * * Returns the number of space chars skipped */ int xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int cur, res = 0; do { cur = CUR; while (IS_BLANK(cur)) { NEXT; cur = CUR; res++; } while ((cur == 0) && (ctxt->inputNr > 1) && (ctxt->instate != XML_PARSER_COMMENT)) { xmlPopInput(ctxt); cur = CUR; } if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); } while (IS_BLANK(cur)); return(res); } /************************************************************************ * * * Commodity functions to handle entities processing * * * ************************************************************************/ /** * xmlPopInput: * @ctxt: an XML parser context * * xmlPopInput: the current input pointed by ctxt->input came to an end * pop it and return the next char. * * Returns the current xmlChar in the parser context */ xmlChar xmlPopInput(xmlParserCtxtPtr ctxt) { if (ctxt->inputNr == 1) return(0); /* End of main Input */ xmlFreeInputStream(inputPop(ctxt)); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) return(xmlPopInput(ctxt)); return(CUR); } /** * xmlPushInput: * @ctxt: an XML parser context * @input: an XML parser input fragment (entity, XML fragment ...). * * xmlPushInput: switch to a new input stream which is stacked on top * of the previous one(s). */ void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { if (input == NULL) return; inputPush(ctxt, input); GROW; } /** * xmlFreeInputStream: * @input: an xmlParserInputPtr * * Free up an input stream. */ void xmlFreeInputStream(xmlParserInputPtr input) { if (input == NULL) return; if (input->filename != NULL) xmlFree((char *) input->filename); if (input->directory != NULL) xmlFree((char *) input->directory); if (input->encoding != NULL) xmlFree((char *) input->encoding); if (input->version != NULL) xmlFree((char *) input->version); if ((input->free != NULL) && (input->base != NULL)) input->free((xmlChar *) input->base); if (input->buf != NULL) xmlFreeParserInputBuffer(input->buf); memset(input, -1, sizeof(xmlParserInput)); xmlFree(input); } /** * xmlNewInputStream: * @ctxt: an XML parser context * * Create a new input stream structure * Returns the new input stream or NULL */ xmlParserInputPtr xmlNewInputStream(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); if (input == NULL) { if (ctxt != NULL) { ctxt->errNo = XML_ERR_NO_MEMORY; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n"); ctxt->errNo = XML_ERR_NO_MEMORY; } return(NULL); } memset(input, 0, sizeof(xmlParserInput)); input->line = 1; input->col = 1; input->standalone = -1; return(input); } /** * xmlNewIOInputStream: * @ctxt: an XML parser context * @input: an I/O Input * @enc: the charset encoding if known * * Create a new input stream structure encapsulating the @input into * a stream suitable for the parser. * * Returns the new input stream or NULL */ xmlParserInputPtr xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, xmlCharEncoding enc) { xmlParserInputPtr inputStream; inputStream = xmlNewInputStream(ctxt); if (inputStream == NULL) { return(NULL); } inputStream->filename = NULL; inputStream->buf = input; inputStream->base = inputStream->buf->buffer->content; inputStream->cur = inputStream->buf->buffer->content; if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } return(inputStream); } /** * xmlNewEntityInputStream: * @ctxt: an XML parser context * @entity: an Entity pointer * * Create a new input stream based on an xmlEntityPtr * * Returns the new input stream or NULL */ xmlParserInputPtr xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; if (entity == NULL) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "internal: xmlNewEntityInputStream entity = NULL\n"); ctxt->errNo = XML_ERR_INTERNAL_ERROR; return(NULL); } if (entity->content == NULL) { switch (entity->etype) { case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: ctxt->errNo = XML_ERR_UNPARSED_ENTITY; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlNewEntityInputStream unparsed entity !\n"); break; case XML_EXTERNAL_GENERAL_PARSED_ENTITY: case XML_EXTERNAL_PARAMETER_ENTITY: return(xmlLoadExternalEntity((char *) entity->SystemID, (char *) entity->ExternalID, ctxt)); case XML_INTERNAL_GENERAL_ENTITY: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Internal entity %s without content !\n", entity->name); break; case XML_INTERNAL_PARAMETER_ENTITY: ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Internal parameter entity %s without content !\n", entity->name); break; case XML_INTERNAL_PREDEFINED_ENTITY: ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Predefined entity %s without content !\n", entity->name); break; } return(NULL); } input = xmlNewInputStream(ctxt); if (input == NULL) { return(NULL); } input->filename = (char *) entity->SystemID; input->base = entity->content; input->cur = entity->content; input->length = entity->length; return(input); } /** * xmlNewStringInputStream: * @ctxt: an XML parser context * @buffer: an memory buffer * * Create a new input stream based on a memory buffer. * Returns the new input stream */ xmlParserInputPtr xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { xmlParserInputPtr input; if (buffer == NULL) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "internal: xmlNewStringInputStream string = NULL\n"); return(NULL); } input = xmlNewInputStream(ctxt); if (input == NULL) { return(NULL); } input->base = buffer; input->cur = buffer; input->length = xmlStrlen(buffer); return(input); } /** * xmlNewInputFromFile: * @ctxt: an XML parser context * @filename: the filename to use as entity * * Create a new input stream based on a file. * * Returns the new input stream or NULL in case of error */ xmlParserInputPtr xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { xmlParserInputBufferPtr buf; xmlParserInputPtr inputStream; char *directory = NULL; if (ctxt == NULL) return(NULL); buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); if (buf == NULL) { char name[XML_PARSER_BIG_BUFFER_SIZE]; if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) { #ifdef WIN32 sprintf(name, "%s\\%s", ctxt->input->directory, filename); #else sprintf(name, "%s/%s", ctxt->input->directory, filename); #endif buf = xmlParserInputBufferCreateFilename(name, XML_CHAR_ENCODING_NONE); if (buf != NULL) directory = xmlParserGetDirectory(name); } if ((buf == NULL) && (ctxt->directory != NULL)) { #ifdef WIN32 sprintf(name, "%s\\%s", ctxt->directory, filename); #else sprintf(name, "%s/%s", ctxt->directory, filename); #endif buf = xmlParserInputBufferCreateFilename(name, XML_CHAR_ENCODING_NONE); if (buf != NULL) directory = xmlParserGetDirectory(name); } if (buf == NULL) return(NULL); } if (directory == NULL) directory = xmlParserGetDirectory(filename); inputStream = xmlNewInputStream(ctxt); if (inputStream == NULL) { if (directory != NULL) xmlFree((char *) directory); return(NULL); } inputStream->filename = xmlMemStrdup(filename); inputStream->directory = directory; inputStream->buf = buf; inputStream->base = inputStream->buf->buffer->content; inputStream->cur = inputStream->buf->buffer->content; if ((ctxt->directory == NULL) && (directory != NULL)) ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); return(inputStream); } /************************************************************************ * * * Commodity functions to handle parser contexts * * * ************************************************************************/ /** * xmlInitParserCtxt: * @ctxt: an XML parser context * * Initialize a parser context */ void xmlInitParserCtxt(xmlParserCtxtPtr ctxt) { xmlSAXHandler *sax; xmlDefaultSAXHandlerInit(); sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (sax == NULL) { fprintf(stderr, "xmlInitParserCtxt: out of memory\n"); } memset(sax, 0, sizeof(xmlSAXHandler)); /* Allocate the Input stack */ ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr)); ctxt->inputNr = 0; ctxt->inputMax = 5; ctxt->input = NULL; ctxt->version = NULL; ctxt->encoding = NULL; ctxt->standalone = -1; ctxt->hasExternalSubset = 0; ctxt->hasPErefs = 0; ctxt->html = 0; ctxt->external = 0; ctxt->instate = XML_PARSER_START; ctxt->token = 0; ctxt->directory = NULL; /* Allocate the Node stack */ ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); ctxt->nodeNr = 0; ctxt->nodeMax = 10; ctxt->node = NULL; /* Allocate the Name stack */ ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); ctxt->nameNr = 0; ctxt->nameMax = 10; ctxt->name = NULL; /* Allocate the space stack */ ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); ctxt->spaceNr = 1; ctxt->spaceMax = 10; ctxt->spaceTab[0] = -1; ctxt->space = &ctxt->spaceTab[0]; if (sax == NULL) { ctxt->sax = &xmlDefaultSAXHandler; } else { ctxt->sax = sax; memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler)); } ctxt->userData = ctxt; ctxt->myDoc = NULL; ctxt->wellFormed = 1; ctxt->valid = 1; ctxt->validate = xmlDoValidityCheckingDefaultValue; ctxt->keepBlanks = xmlKeepBlanksDefaultValue; ctxt->vctxt.userData = ctxt; if (ctxt->validate) { ctxt->vctxt.error = xmlParserValidityError; if (xmlGetWarningsDefaultValue == 0) ctxt->vctxt.warning = NULL; else ctxt->vctxt.warning = xmlParserValidityWarning; /* Allocate the Node stack */ ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr)); ctxt->vctxt.nodeNr = 0; ctxt->vctxt.nodeMax = 4; ctxt->vctxt.node = NULL; } else { ctxt->vctxt.error = NULL; ctxt->vctxt.warning = NULL; } ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; ctxt->record_info = 0; ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; xmlInitNodeInfoSeq(&ctxt->node_seq); } /** * xmlFreeParserCtxt: * @ctxt: an XML parser context * * Free all the memory used by a parser context. However the parsed * document in ctxt->myDoc is not freed. */ void xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; xmlChar *oldname; if (ctxt == NULL) return; while ((input = inputPop(ctxt)) != NULL) { xmlFreeInputStream(input); } while ((oldname = namePop(ctxt)) != NULL) { xmlFree(oldname); } if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab); if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); if (ctxt->version != NULL) xmlFree((char *) ctxt->version); if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName); if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler)) xmlFree(ctxt->sax); if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); xmlFree(ctxt); } /** * xmlNewParserCtxt: * * Allocate and initialize a new parser context. * * Returns the xmlParserCtxtPtr or NULL */ xmlParserCtxtPtr xmlNewParserCtxt() { xmlParserCtxtPtr ctxt; ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); if (ctxt == NULL) { fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n"); perror("malloc"); return(NULL); } memset(ctxt, 0, sizeof(xmlParserCtxt)); xmlInitParserCtxt(ctxt); return(ctxt); } /** * xmlClearParserCtxt: * @ctxt: an XML parser context * * Clear (release owned resources) and reinitialize a parser context */ void xmlClearParserCtxt(xmlParserCtxtPtr ctxt) { xmlClearNodeInfoSeq(&ctxt->node_seq); xmlInitParserCtxt(ctxt); } /************************************************************************ * * * Commodity functions to handle entities * * * ************************************************************************/ /** * xmlCheckEntity: * @ctxt: an XML parser context * @content: the entity content string * * Parse an entity content and checks the WF constraints * */ void xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) { } /** * xmlParseCharRef: * @ctxt: an XML parser context * * parse Reference declarations * * [66] CharRef ::= '&#' [0-9]+ ';' | * '&#x' [0-9a-fA-F]+ ';' * * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. * * Returns the value parsed (as an int), 0 in case of error */ int xmlParseCharRef(xmlParserCtxtPtr ctxt) { int val = 0; if (ctxt->token != 0) { val = ctxt->token; ctxt->token = 0; return(val); } if ((RAW == '&') && (NXT(1) == '#') && (NXT(2) == 'x')) { SKIP(3); while (RAW != ';') { if ((RAW >= '0') && (RAW <= '9')) val = val * 16 + (CUR - '0'); else if ((RAW >= 'a') && (RAW <= 'f')) val = val * 16 + (CUR - 'a') + 10; else if ((RAW >= 'A') && (RAW <= 'F')) val = val * 16 + (CUR - 'A') + 10; else { ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid hexadecimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } NEXT; } if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->nbChars ++; ctxt->input->cur++; } } else if ((RAW == '&') && (NXT(1) == '#')) { SKIP(2); while (RAW != ';') { if ((RAW >= '0') && (RAW <= '9')) val = val * 10 + (CUR - '0'); else { ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid decimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } NEXT; } if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->nbChars ++; ctxt->input->cur++; } } else { ctxt->errNo = XML_ERR_INVALID_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } /* * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. */ if (IS_CHAR(val)) { return(val); } else { ctxt->errNo = XML_ERR_INVALID_CHAR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", val); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(0); } /** * xmlParseStringCharRef: * @ctxt: an XML parser context * @str: a pointer to an index in the string * * parse Reference declarations, variant parsing from a string rather * than an an input flow. * * [66] CharRef ::= '&#' [0-9]+ ';' | * '&#x' [0-9a-fA-F]+ ';' * * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. * * Returns the value parsed (as an int), 0 in case of error, str will be * updated to the current value of the index */ int xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { const xmlChar *ptr; xmlChar cur; int val = 0; if ((str == NULL) || (*str == NULL)) return(0); ptr = *str; cur = *ptr; if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { ptr += 3; cur = *ptr; while (cur != ';') { if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) val = val * 16 + (cur - 'a') + 10; else if ((cur >= 'A') && (cur <= 'F')) val = val * 16 + (cur - 'A') + 10; else { ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringCharRef: invalid hexadecimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } ptr++; cur = *ptr; } if (cur == ';') ptr++; } else if ((cur == '&') && (ptr[1] == '#')){ ptr += 2; cur = *ptr; while (cur != ';') { if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringCharRef: invalid decimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } ptr++; cur = *ptr; } if (cur == ';') ptr++; } else { ctxt->errNo = XML_ERR_INVALID_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(0); } *str = ptr; /* * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. */ if (IS_CHAR(val)) { return(val); } else { ctxt->errNo = XML_ERR_INVALID_CHAR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", val); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(0); } /** * xmlParserHandleReference: * @ctxt: the parser context * * [67] Reference ::= EntityRef | CharRef * * [68] EntityRef ::= '&' Name ';' * * [ WFC: Entity Declared ] * the Name given in the entity reference must match that in an entity * declaration, except that well-formed documents need not declare any * of the following entities: amp, lt, gt, apos, quot. * * [ WFC: Parsed Entity ] * An entity reference must not contain the name of an unparsed entity * * [66] CharRef ::= '&#' [0-9]+ ';' | * '&#x' [0-9a-fA-F]+ ';' * * A PEReference may have been detectect in the current input stream * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc */ void xmlParserHandleReference(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; xmlChar *name; xmlEntityPtr ent = NULL; if (ctxt->token != 0) { return; } if (RAW != '&') return; GROW; if ((RAW == '&') && (NXT(1) == '#')) { switch(ctxt->instate) { case XML_PARSER_ENTITY_DECL: case XML_PARSER_PI: case XML_PARSER_CDATA_SECTION: case XML_PARSER_COMMENT: case XML_PARSER_SYSTEM_LITERAL: /* we just ignore it there */ return; case XML_PARSER_START_TAG: return; case XML_PARSER_END_TAG: return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_CHARREF_AT_EOF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef at EOF\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_PROLOG: case XML_PARSER_START: case XML_PARSER_MISC: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_EPILOG: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_DTD: ctxt->errNo = XML_ERR_CHARREF_IN_DTD; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef are forbiden in DTDs!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_ENTITY_VALUE: /* * NOTE: in the case of entity values, we don't do the * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. * This will be handled by xmlDecodeEntities */ return; case XML_PARSER_CONTENT: case XML_PARSER_ATTRIBUTE_VALUE: ctxt->token = xmlParseCharRef(ctxt); return; } return; } switch(ctxt->instate) { case XML_PARSER_CDATA_SECTION: return; case XML_PARSER_PI: case XML_PARSER_COMMENT: case XML_PARSER_SYSTEM_LITERAL: case XML_PARSER_CONTENT: return; case XML_PARSER_START_TAG: return; case XML_PARSER_END_TAG: return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Reference at EOF\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_PROLOG: case XML_PARSER_START: case XML_PARSER_MISC: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Reference in prolog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_EPILOG: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Reference in epilog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_ENTITY_VALUE: /* * NOTE: in the case of entity values, we don't do the * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. * This will be handled by xmlDecodeEntities */ return; case XML_PARSER_ATTRIBUTE_VALUE: /* * NOTE: in the case of attributes values, we don't do the * substitution here unless we are in a mode where * the parser is explicitely asked to substitute * entities. The SAX callback is called with values * without entity substitution. * This will then be handled by xmlDecodeEntities */ return; case XML_PARSER_ENTITY_DECL: /* * we just ignore it there * the substitution will be done once the entity is referenced */ return; case XML_PARSER_DTD: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Entity references are forbiden in DTDs!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; } NEXT; name = xmlScanName(ctxt); if (name == NULL) { ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Entity reference: no name\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->token = '&'; return; } if (NXT(xmlStrlen(name)) != ';') { ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Entity reference: ';' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->token = '&'; xmlFree(name); return; } SKIP(xmlStrlen(name) + 1); if (ctxt->sax != NULL) { if (ctxt->sax->getEntity != NULL) ent = ctxt->sax->getEntity(ctxt->userData, name); } /* * [ WFC: Entity Declared ] * the Name given in the entity reference must match that in an entity * declaration, except that well-formed documents need not declare any * of the following entities: amp, lt, gt, apos, quot. */ if (ent == NULL) ent = xmlGetPredefinedEntity(name); if (ent == NULL) { ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Entity reference: entity %s not declared\n", name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; xmlFree(name); return; } /* * [ WFC: Parsed Entity ] * An entity reference must not contain the name of an unparsed entity */ if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { ctxt->errNo = XML_ERR_UNPARSED_ENTITY; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Entity reference to unparsed entity %s\n", name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) { ctxt->token = ent->content[0]; xmlFree(name); return; } input = xmlNewEntityInputStream(ctxt, ent); xmlPushInput(ctxt, input); xmlFree(name); return; } /** * xmlParserHandlePEReference: * @ctxt: the parser context * * [69] PEReference ::= '%' Name ';' * * [ WFC: No Recursion ] * TODO A parsed entity must not contain a recursive * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD * subset which contains no parameter entity references, or a document * with "standalone='yes'", ... ... The declaration of a parameter * entity must precede any reference to it... * * [ VC: Entity Declared ] * In a document with an external subset or external parameter entities * with "standalone='no'", ... ... The declaration of a parameter entity * must precede any reference to it... * * [ WFC: In DTD ] * Parameter-entity references may only appear in the DTD. * NOTE: misleading but this is handled. * * A PEReference may have been detected in the current input stream * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc * i.e. * - Included in literal in entity values * - Included as Paraemeter Entity reference within DTDs */ void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlChar *name; xmlEntityPtr entity = NULL; xmlParserInputPtr input; if (ctxt->token != 0) { return; } if (RAW != '%') return; switch(ctxt->instate) { case XML_PARSER_CDATA_SECTION: return; case XML_PARSER_COMMENT: return; case XML_PARSER_START_TAG: return; case XML_PARSER_END_TAG: return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_PEREF_AT_EOF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_PROLOG: case XML_PARSER_START: case XML_PARSER_MISC: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_ENTITY_DECL: case XML_PARSER_CONTENT: case XML_PARSER_ATTRIBUTE_VALUE: case XML_PARSER_PI: case XML_PARSER_SYSTEM_LITERAL: /* we just ignore it there */ return; case XML_PARSER_EPILOG: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_ENTITY_VALUE: /* * NOTE: in the case of entity values, we don't do the * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. * This will be handled by xmlDecodeEntities */ return; case XML_PARSER_DTD: /* * [WFC: Well-Formedness Constraint: PEs in Internal Subset] * In the internal DTD subset, parameter-entity references * can occur only where markup declarations can occur, not * within markup declarations. * In that case this is handled in xmlParseMarkupDecl */ if ((ctxt->external == 0) && (ctxt->inputNr == 1)) return; } NEXT; name = xmlParseName(ctxt); if (name == NULL) { ctxt->errNo = XML_ERR_PEREF_NO_NAME; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { if (RAW == ';') { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, name); if (entity == NULL) { /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an * internal DTD subset which contains no parameter entity * references, or a document with "standalone='yes'", ... * ... The declaration of a parameter entity must precede * any reference to it... */ if ((ctxt->standalone == 1) || ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { /* * [ VC: Entity Declared ] * In a document with an external subset or external * parameter entities with "standalone='no'", ... * ... The declaration of a parameter entity must precede * any reference to it... */ if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) ctxt->sax->warning(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->valid = 0; } } else { if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { /* * TODO !!! handle the extra spaces added before and after * c.f. http://www.w3.org/TR/REC-xml#as-PE */ input = xmlNewEntityInputStream(ctxt, entity); xmlPushInput(ctxt, input); if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && (RAW == '<') && (NXT(1) == '?') && (NXT(2) == 'x') && (NXT(3) == 'm') && (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { xmlParseTextDecl(ctxt); } if (ctxt->token == 0) ctxt->token = ' '; } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: %s is not a parameter entity\n", name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } } else { ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: expecting ';'\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } xmlFree(name); } } /* * Macro used to grow the current buffer. */ #define growBuffer(buffer) { \ buffer##_size *= 2; \ buffer = (xmlChar *) \ xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ if (buffer == NULL) { \ perror("realloc failed"); \ return(NULL); \ } \ } /** * xmlDecodeEntities: * @ctxt: the parser context * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF * @len: the len to decode (in bytes !), -1 for no size limit * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none * * [67] Reference ::= EntityRef | CharRef * * [69] PEReference ::= '%' Name ';' * * Returns A newly allocated string with the substitution done. The caller * must deallocate it ! */ xmlChar * xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; unsigned int buffer_size = 0; unsigned int nbchars = 0; xmlChar *current = NULL; xmlEntityPtr ent; unsigned int max = (unsigned int) len; int c,l; if (ctxt->depth > 40) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Detected entity reference loop\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->errNo = XML_ERR_ENTITY_LOOP; return(NULL); } /* * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { perror("xmlDecodeEntities: malloc failed"); return(NULL); } /* * Ok loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); while ((nbchars < max) && (c != end) && (c != end2) && (c != end3)) { if (c == 0) break; if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) { int val = xmlParseCharRef(ctxt); COPY_BUF(0,buffer,nbchars,val); NEXTL(l); } else if ((c == '&') && (ctxt->token != '&') && (what & XML_SUBSTITUTE_REF)) { ent = xmlParseEntityRef(ctxt); if ((ent != NULL) && (ctxt->replaceEntities != 0)) { current = ent->content; while (*current != 0) { buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } } else if (ent != NULL) { const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } while (*cur != 0) { buffer[nbchars++] = *cur++; } buffer[nbchars++] = ';'; } } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { /* * a PEReference induce to switch the entity flow, * we break here to flush the current set of chars * parsed if any. We will be called back later. */ if (nbchars != 0) break; xmlParsePEReference(ctxt); /* * Pop-up of finished entities. */ while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); break; } else { COPY_BUF(l,buffer,nbchars,c); NEXTL(l); if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } c = CUR_CHAR(l); } buffer[nbchars++] = 0; return(buffer); } /** * xmlStringDecodeEntities: * @ctxt: the parser context * @str: the input string * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none * * [67] Reference ::= EntityRef | CharRef * * [69] PEReference ::= '%' Name ';' * * Returns A newly allocated string with the substitution done. The caller * must deallocate it ! */ xmlChar * xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; int buffer_size = 0; xmlChar *current = NULL; xmlEntityPtr ent; int c,l; int nbchars = 0; if (ctxt->depth > 40) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Detected entity reference loop\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->errNo = XML_ERR_ENTITY_LOOP; return(NULL); } /* * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { perror("xmlDecodeEntities: malloc failed"); return(NULL); } /* * Ok loop until we reach one of the ending char or a size limit. */ c = CUR_SCHAR(str, l); while ((c != 0) && (c != end) && (c != end2) && (c != end3)) { if (c == 0) break; if ((c == '&') && (str[1] == '#')) { int val = xmlParseStringCharRef(ctxt, &str); if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { ent = xmlParseStringEntityRef(ctxt, &str); if ((ent != NULL) && (ent->content != NULL)) { xmlChar *rep; ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; if (rep != NULL) { current = rep; while (*current != 0) { buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } xmlFree(rep); } } else if (ent != NULL) { int i = xmlStrlen(ent->name); const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } for (;i > 0;i--) buffer[nbchars++] = *cur++; buffer[nbchars++] = ';'; } } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { ent = xmlParseStringPEReference(ctxt, &str); if (ent != NULL) { xmlChar *rep; ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; if (rep != NULL) { current = rep; while (*current != 0) { buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } xmlFree(rep); } } } else { COPY_BUF(l,buffer,nbchars,c); str += l; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } c = CUR_SCHAR(str, l); } buffer[nbchars++] = 0; return(buffer); } /************************************************************************ * * * Commodity functions to handle encodings * * * ************************************************************************/ /* * xmlCheckLanguageID * @lang: pointer to the string value * * Checks that the value conforms to the LanguageID production: * * [33] LanguageID ::= Langcode ('-' Subcode)* * [34] Langcode ::= ISO639Code | IanaCode | UserCode * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ * [38] Subcode ::= ([a-z] | [A-Z])+ * * Returns 1 if correct 0 otherwise **/ int xmlCheckLanguageID(const xmlChar *lang) { const xmlChar *cur = lang; if (cur == NULL) return(0); if (((cur[0] == 'i') && (cur[1] == '-')) || ((cur[0] == 'I') && (cur[1] == '-'))) { /* * IANA code */ cur += 2; while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; } else if (((cur[0] == 'x') && (cur[1] == '-')) || ((cur[0] == 'X') && (cur[1] == '-'))) { /* * User code */ cur += 2; while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) { /* * ISO639 */ cur++; if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; else return(0); } else return(0); while (cur[0] != 0) { if (cur[0] != '-') return(0); cur++; if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; else return(0); while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; } return(1); } /** * xmlSwitchEncoding: * @ctxt: the parser context * @enc: the encoding value (number) * * change the input functions when discovering the character encoding * of a given entity. * * Returns 0 in case of success, -1 otherwise */ int xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) { xmlCharEncodingHandlerPtr handler; switch (enc) { case XML_CHAR_ENCODING_ERROR: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "encoding unknown\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; break; case XML_CHAR_ENCODING_NONE: /* let's assume it's UTF-8 without the XML decl */ ctxt->charset = XML_CHAR_ENCODING_UTF8; return(0); case XML_CHAR_ENCODING_UTF8: /* default encoding, no conversion should be needed */ ctxt->charset = XML_CHAR_ENCODING_UTF8; return(0); default: break; } handler = xmlGetCharEncodingHandler(enc); if (handler == NULL) { /* * Default handlers. */ switch (enc) { case XML_CHAR_ENCODING_ERROR: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "encoding unknown\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->charset = XML_CHAR_ENCODING_UTF8; break; case XML_CHAR_ENCODING_NONE: /* let's assume it's UTF-8 without the XML decl */ ctxt->charset = XML_CHAR_ENCODING_UTF8; return(0); case XML_CHAR_ENCODING_UTF8: /* default encoding, no conversion should be needed */ ctxt->charset = XML_CHAR_ENCODING_UTF8; return(0); case XML_CHAR_ENCODING_UTF16LE: break; case XML_CHAR_ENCODING_UTF16BE: break; case XML_CHAR_ENCODING_UCS4LE: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding USC4 little endian not supported\n"); break; case XML_CHAR_ENCODING_UCS4BE: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding USC4 big endian not supported\n"); break; case XML_CHAR_ENCODING_EBCDIC: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding EBCDIC not supported\n"); break; case XML_CHAR_ENCODING_UCS4_2143: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding UCS4 2143 not supported\n"); break; case XML_CHAR_ENCODING_UCS4_3412: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding UCS4 3412 not supported\n"); break; case XML_CHAR_ENCODING_UCS2: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding UCS2 not supported\n"); break; case XML_CHAR_ENCODING_8859_1: case XML_CHAR_ENCODING_8859_2: case XML_CHAR_ENCODING_8859_3: case XML_CHAR_ENCODING_8859_4: case XML_CHAR_ENCODING_8859_5: case XML_CHAR_ENCODING_8859_6: case XML_CHAR_ENCODING_8859_7: case XML_CHAR_ENCODING_8859_8: case XML_CHAR_ENCODING_8859_9: /* * Keep the internal content in the document encoding */ if ((ctxt->inputNr == 1) && (ctxt->encoding == NULL) && (ctxt->input->encoding != NULL)) { ctxt->encoding = xmlStrdup(ctxt->input->encoding); } ctxt->charset = enc; return(0); case XML_CHAR_ENCODING_2022_JP: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding ISO-2022-JPnot supported\n"); break; case XML_CHAR_ENCODING_SHIFT_JIS: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding Shift_JIS not supported\n"); break; case XML_CHAR_ENCODING_EUC_JP: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "char encoding EUC-JPnot supported\n"); break; } } if (handler == NULL) return(-1); ctxt->charset = XML_CHAR_ENCODING_UTF8; return(xmlSwitchToEncoding(ctxt, handler)); } /** * xmlSwitchToEncoding: * @ctxt: the parser context * @handler: the encoding handler * * change the input functions when discovering the character encoding * of a given entity. * * Returns 0 in case of success, -1 otherwise */ int xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) { int nbchars; if (handler != NULL) { if (ctxt->input != NULL) { if (ctxt->input->buf != NULL) { if (ctxt->input->buf->encoder != NULL) { if (ctxt->input->buf->encoder == handler) return(0); /* * Note: this is a bit dangerous, but that's what it * takes to use nearly compatible signature for different * encodings. */ xmlCharEncCloseFunc(ctxt->input->buf->encoder); ctxt->input->buf->encoder = handler; return(0); } ctxt->input->buf->encoder = handler; /* * Is there already some content down the pipe to convert ? */ if ((ctxt->input->buf->buffer != NULL) && (ctxt->input->buf->buffer->use > 0)) { int processed; /* * Specific handling of the Byte Order Mark for * UTF-16 */ if ((handler->name != NULL) && (!strcmp(handler->name, "UTF-16LE")) && (ctxt->input->cur[0] == 0xFF) && (ctxt->input->cur[1] == 0xFE)) { ctxt->input->cur += 2; } if ((handler->name != NULL) && (!strcmp(handler->name, "UTF-16BE")) && (ctxt->input->cur[0] == 0xFE) && (ctxt->input->cur[1] == 0xFF)) { ctxt->input->cur += 2; } /* * Shring the current input buffer. * Move it as the raw buffer and create a new input buffer */ processed = ctxt->input->cur - ctxt->input->base; xmlBufferShrink(ctxt->input->buf->buffer, processed); ctxt->input->buf->raw = ctxt->input->buf->buffer; ctxt->input->buf->buffer = xmlBufferCreate(); /* * convert just enough to get * '' * parsed with the autodetected encoding * into the parser reading buffer. */ nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder, ctxt->input->buf->buffer, ctxt->input->buf->raw); if (nbchars < 0) { fprintf(stderr, "xmlSwitchToEncoding: encoder error\n"); return(-1); } ctxt->input->base = ctxt->input->cur = ctxt->input->buf->buffer->content; } return(0); } else { if (ctxt->input->length == 0) { /* * When parsing a static memory array one must know the * size to be able to convert the buffer. */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlSwitchEncoding : no input\n"); return(-1); } else { int processed; /* * Shring the current input buffer. * Move it as the raw buffer and create a new input buffer */ processed = ctxt->input->cur - ctxt->input->base; ctxt->input->buf->raw = xmlBufferCreate(); xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, ctxt->input->length - processed); ctxt->input->buf->buffer = xmlBufferCreate(); /* * convert as much as possible of the raw input * to the parser reading buffer. */ nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, ctxt->input->buf->buffer, ctxt->input->buf->raw); if (nbchars < 0) { fprintf(stderr, "xmlSwitchToEncoding: encoder error\n"); return(-1); } /* * Conversion succeeded, get rid of the old buffer */ if ((ctxt->input->free != NULL) && (ctxt->input->base != NULL)) ctxt->input->free((xmlChar *) ctxt->input->base); ctxt->input->base = ctxt->input->cur = ctxt->input->buf->buffer->content; } } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlSwitchEncoding : no input\n"); return(-1); } /* * The parsing is now done in UTF8 natively */ ctxt->charset = XML_CHAR_ENCODING_UTF8; } else return(-1); return(0); } /************************************************************************ * * * Commodity functions to handle xmlChars * * * ************************************************************************/ /** * xmlStrndup: * @cur: the input xmlChar * * @len: the len of @cur * * a strndup for array of xmlChar's * * Returns a new xmlChar * or NULL */ xmlChar * xmlStrndup(const xmlChar *cur, int len) { xmlChar *ret; if ((cur == NULL) || (len < 0)) return(NULL); ret = xmlMalloc((len + 1) * sizeof(xmlChar)); if (ret == NULL) { fprintf(stderr, "malloc of %ld byte failed\n", (len + 1) * (long)sizeof(xmlChar)); return(NULL); } memcpy(ret, cur, len * sizeof(xmlChar)); ret[len] = 0; return(ret); } /** * xmlStrdup: * @cur: the input xmlChar * * * a strdup for array of xmlChar's. Since they are supposed to be * encoded in UTF-8 or an encoding with 8bit based chars, we assume * a termination mark of '0'. * * Returns a new xmlChar * or NULL */ xmlChar * xmlStrdup(const xmlChar *cur) { const xmlChar *p = cur; if (cur == NULL) return(NULL); while (*p != 0) p++; return(xmlStrndup(cur, p - cur)); } /** * xmlCharStrndup: * @cur: the input char * * @len: the len of @cur * * a strndup for char's to xmlChar's * * Returns a new xmlChar * or NULL */ xmlChar * xmlCharStrndup(const char *cur, int len) { int i; xmlChar *ret; if ((cur == NULL) || (len < 0)) return(NULL); ret = xmlMalloc((len + 1) * sizeof(xmlChar)); if (ret == NULL) { fprintf(stderr, "malloc of %ld byte failed\n", (len + 1) * (long)sizeof(xmlChar)); return(NULL); } for (i = 0;i < len;i++) ret[i] = (xmlChar) cur[i]; ret[len] = 0; return(ret); } /** * xmlCharStrdup: * @cur: the input char * * @len: the len of @cur * * a strdup for char's to xmlChar's * * Returns a new xmlChar * or NULL */ xmlChar * xmlCharStrdup(const char *cur) { const char *p = cur; if (cur == NULL) return(NULL); while (*p != '\0') p++; return(xmlCharStrndup(cur, p - cur)); } /** * xmlStrcmp: * @str1: the first xmlChar * * @str2: the second xmlChar * * * a strcmp for xmlChar's * * Returns the integer result of the comparison */ int xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { register int tmp; if ((str1 == NULL) && (str2 == NULL)) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); do { tmp = *str1++ - *str2++; if (tmp != 0) return(tmp); } while ((*str1 != 0) && (*str2 != 0)); return (*str1 - *str2); } /** * xmlStrncmp: * @str1: the first xmlChar * * @str2: the second xmlChar * * @len: the max comparison length * * a strncmp for xmlChar's * * Returns the integer result of the comparison */ int xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { register int tmp; if (len <= 0) return(0); if ((str1 == NULL) && (str2 == NULL)) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); do { tmp = *str1++ - *str2++; if (tmp != 0) return(tmp); len--; if (len <= 0) return(0); } while ((*str1 != 0) && (*str2 != 0)); return (*str1 - *str2); } /** * xmlStrchr: * @str: the xmlChar * array * @val: the xmlChar to search * * a strchr for xmlChar's * * Returns the xmlChar * for the first occurence or NULL. */ const xmlChar * xmlStrchr(const xmlChar *str, xmlChar val) { if (str == NULL) return(NULL); while (*str != 0) { if (*str == val) return((xmlChar *) str); str++; } return(NULL); } /** * xmlStrstr: * @str: the xmlChar * array (haystack) * @val: the xmlChar to search (needle) * * a strstr for xmlChar's * * Returns the xmlChar * for the first occurence or NULL. */ const xmlChar * xmlStrstr(const xmlChar *str, xmlChar *val) { int n; if (str == NULL) return(NULL); if (val == NULL) return(NULL); n = xmlStrlen(val); if (n == 0) return(str); while (*str != 0) { if (*str == *val) { if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); } str++; } return(NULL); } /** * xmlStrsub: * @str: the xmlChar * array (haystack) * @start: the index of the first char (zero based) * @len: the length of the substring * * Extract a substring of a given string * * Returns the xmlChar * for the first occurence or NULL. */ xmlChar * xmlStrsub(const xmlChar *str, int start, int len) { int i; if (str == NULL) return(NULL); if (start < 0) return(NULL); if (len < 0) return(NULL); for (i = 0;i < start;i++) { if (*str == 0) return(NULL); str++; } if (*str == 0) return(NULL); return(xmlStrndup(str, len)); } /** * xmlStrlen: * @str: the xmlChar * array * * length of a xmlChar's string * * Returns the number of xmlChar contained in the ARRAY. */ int xmlStrlen(const xmlChar *str) { int len = 0; if (str == NULL) return(0); while (*str != 0) { str++; len++; } return(len); } /** * xmlStrncat: * @cur: the original xmlChar * array * @add: the xmlChar * array added * @len: the length of @add * * a strncat for array of xmlChar's * * Returns a new xmlChar * containing the concatenated string. */ xmlChar * xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { int size; xmlChar *ret; if ((add == NULL) || (len == 0)) return(cur); if (cur == NULL) return(xmlStrndup(add, len)); size = xmlStrlen(cur); ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); if (ret == NULL) { fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n", (size + len + 1) * (long)sizeof(xmlChar)); return(cur); } memcpy(&ret[size], add, len * sizeof(xmlChar)); ret[size + len] = 0; return(ret); } /** * xmlStrcat: * @cur: the original xmlChar * array * @add: the xmlChar * array added * * a strcat for array of xmlChar's. Since they are supposed to be * encoded in UTF-8 or an encoding with 8bit based chars, we assume * a termination mark of '0'. * * Returns a new xmlChar * containing the concatenated string. */ xmlChar * xmlStrcat(xmlChar *cur, const xmlChar *add) { const xmlChar *p = add; if (add == NULL) return(cur); if (cur == NULL) return(xmlStrdup(add)); while (*p != 0) p++; return(xmlStrncat(cur, add, p - add)); } /************************************************************************ * * * Commodity functions, cleanup needed ? * * * ************************************************************************/ /** * areBlanks: * @ctxt: an XML parser context * @str: a xmlChar * * @len: the size of @str * * Is this a sequence of blank chars that one can ignore ? * * Returns 1 if ignorable 0 otherwise. */ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { int i, ret; xmlNodePtr lastChild; /* * Check for xml:space value. */ if (*(ctxt->space) == 1) return(0); /* * Check that the string is made of blanks */ for (i = 0;i < len;i++) if (!(IS_BLANK(str[i]))) return(0); /* * Look if the element is mixed content in the Dtd if available */ if (ctxt->myDoc != NULL) { ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); if (ret == 0) return(1); if (ret == 1) return(0); } /* * Otherwise, heuristic :-\ */ if (ctxt->keepBlanks) return(0); if (RAW != '<') return(0); if (ctxt->node == NULL) return(0); if ((ctxt->node->children == NULL) && (RAW == '<') && (NXT(1) == '/')) return(0); lastChild = xmlGetLastChild(ctxt->node); if (lastChild == NULL) { if (ctxt->node->content != NULL) return(0); } else if (xmlNodeIsText(lastChild)) return(0); else if ((ctxt->node->children != NULL) && (xmlNodeIsText(ctxt->node->children))) return(0); return(1); } /** * xmlHandleEntity: * @ctxt: an XML parser context * @entity: an XML entity pointer. * * Default handling of defined entities, when should we define a new input * stream ? When do we just handle that as a set of chars ? * * OBSOLETE: to be removed at some point. */ void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { int len; xmlParserInputPtr input; if (entity->content == NULL) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n", entity->name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; } len = xmlStrlen(entity->content); if (len <= 2) goto handle_as_char; /* * Redefine its content as an input stream. */ input = xmlNewEntityInputStream(ctxt, entity); xmlPushInput(ctxt, input); return; handle_as_char: /* * Just handle the content as a set of chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && (ctxt->sax->characters != NULL)) ctxt->sax->characters(ctxt->userData, entity->content, len); } /* * Forward definition for recusive behaviour. */ void xmlParsePEReference(xmlParserCtxtPtr ctxt); void xmlParseReference(xmlParserCtxtPtr ctxt); /************************************************************************ * * * Extra stuff for namespace support * * Relates to http://www.w3.org/TR/WD-xml-names * * * ************************************************************************/ /** * xmlNamespaceParseNCName: * @ctxt: an XML parser context * * parse an XML namespace name. * * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* * * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | * CombiningChar | Extender * * Returns the namespace name or NULL */ xmlChar * xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int cur = CUR_CHAR(l); /* load first the value of the char !!! */ if (!IS_LETTER(cur) && (cur != '_')) return(NULL); while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || (cur == '.') || (cur == '-') || (cur == '_') || (IS_COMBINING(cur)) || (IS_EXTENDER(cur))) { COPY_BUF(l,buf,len,cur); NEXTL(l); cur = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n"); while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || (cur == '.') || (cur == '-') || (cur == '_') || (IS_COMBINING(cur)) || (IS_EXTENDER(cur))) { NEXTL(l); cur = CUR_CHAR(l); } break; } } return(xmlStrndup(buf, len)); } /** * xmlNamespaceParseQName: * @ctxt: an XML parser context * @prefix: a xmlChar ** * * parse an XML qualified name * * [NS 5] QName ::= (Prefix ':')? LocalPart * * [NS 6] Prefix ::= NCName * * [NS 7] LocalPart ::= NCName * * Returns the local part, and prefix is updated * to get the Prefix if any. */ xmlChar * xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) { xmlChar *ret = NULL; *prefix = NULL; ret = xmlNamespaceParseNCName(ctxt); if (RAW == ':') { *prefix = ret; NEXT; ret = xmlNamespaceParseNCName(ctxt); } return(ret); } /** * xmlSplitQName: * @ctxt: an XML parser context * @name: an XML parser context * @prefix: a xmlChar ** * * parse an XML qualified name string * * [NS 5] QName ::= (Prefix ':')? LocalPart * * [NS 6] Prefix ::= NCName * * [NS 7] LocalPart ::= NCName * * Returns the local part, and prefix is updated * to get the Prefix if any. */ xmlChar * xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0; xmlChar *ret = NULL; const xmlChar *cur = name; int c,l; *prefix = NULL; /* xml: prefix is not really a namespace */ if ((cur[0] == 'x') && (cur[1] == 'm') && (cur[2] == 'l') && (cur[3] == ':')) return(xmlStrdup(name)); /* nasty but valid */ if (cur[0] == ':') return(xmlStrdup(name)); c = CUR_SCHAR(cur, l); if (!IS_LETTER(c) && (c != '_')) return(NULL); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); } ret = xmlStrndup(buf, len); if (c == ':') { cur += l; c = CUR_SCHAR(cur, l); if (!IS_LETTER(c) && (c != '_')) return(ret); *prefix = ret; len = 0; while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); } ret = xmlStrndup(buf, len); } return(ret); } /** * xmlNamespaceParseNSDef: * @ctxt: an XML parser context * * parse a namespace prefix declaration * * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral * * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? * * Returns the namespace name */ xmlChar * xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) { xmlChar *name = NULL; if ((RAW == 'x') && (NXT(1) == 'm') && (NXT(2) == 'l') && (NXT(3) == 'n') && (NXT(4) == 's')) { SKIP(5); if (RAW == ':') { NEXT; name = xmlNamespaceParseNCName(ctxt); } } return(name); } /** * xmlParseQuotedString: * @ctxt: an XML parser context * * [OLD] Parse and return a string between quotes or doublequotes * To be removed at next drop of binary compatibility * * Returns the string parser or NULL. */ xmlChar * xmlParseQuotedString(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0,l; int size = XML_PARSER_BUFFER_SIZE; int c; buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); return(NULL); } if (RAW == '"') { NEXT; c = CUR_CHAR(l); while (IS_CHAR(c) && (c != '"')) { if (len + 5 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); return(NULL); } } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); } if (c != '"') { ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", buf); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { NEXT; } } else if (RAW == '\''){ NEXT; c = CUR; while (IS_CHAR(c) && (c != '\'')) { if (len + 1 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); return(NULL); } } buf[len++] = c; NEXT; c = CUR; } if (RAW != '\'') { ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", buf); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { NEXT; } } return(buf); } /** * xmlParseNamespace: * @ctxt: an XML parser context * * [OLD] xmlParseNamespace: parse specific PI '')) { /* * We can have "ns" or "prefix" attributes * Old encoding as 'href' or 'AS' attributes is still supported */ if ((RAW == 'n') && (NXT(1) == 's')) { garbage = 0; SKIP(2); SKIP_BLANKS; if (RAW != '=') continue; NEXT; SKIP_BLANKS; href = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((RAW == 'h') && (NXT(1) == 'r') && (NXT(2) == 'e') && (NXT(3) == 'f')) { garbage = 0; SKIP(4); SKIP_BLANKS; if (RAW != '=') continue; NEXT; SKIP_BLANKS; href = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((RAW == 'p') && (NXT(1) == 'r') && (NXT(2) == 'e') && (NXT(3) == 'f') && (NXT(4) == 'i') && (NXT(5) == 'x')) { garbage = 0; SKIP(6); SKIP_BLANKS; if (RAW != '=') continue; NEXT; SKIP_BLANKS; prefix = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((RAW == 'A') && (NXT(1) == 'S')) { garbage = 0; SKIP(2); SKIP_BLANKS; if (RAW != '=') continue; NEXT; SKIP_BLANKS; prefix = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((RAW == '?') && (NXT(1) == '>')) { garbage = 0; NEXT; } else { /* * Found garbage when parsing the namespace */ if (!garbage) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n"); } ctxt->errNo = XML_ERR_NS_DECL_ERROR; ctxt->wellFormed = 0; ctxt->disableSAX = 1; NEXT; } } MOVETO_ENDTAG(CUR_PTR); NEXT; /* * Register the DTD. if (href != NULL) if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL)) ctxt->sax->globalNamespace(ctxt->userData, href, prefix); */ if (prefix != NULL) xmlFree(prefix); if (href != NULL) xmlFree(href); } /************************************************************************ * * * The parser itself * * Relates to http://www.w3.org/TR/REC-xml * * * ************************************************************************/ /** * xmlScanName: * @ctxt: an XML parser context * * Trickery: parse an XML name but without consuming the input flow * Needed for rollback cases. * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender * * [5] Name ::= (Letter | '_' | ':') (NameChar)* * * [6] Names ::= Name (S Name)* * * Returns the Name parsed or NULL */ xmlChar * xmlScanName(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN]; int len = 0; GROW; if (!IS_LETTER(RAW) && (RAW != '_') && (RAW != ':')) { return(NULL); } while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || (NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '_') || (NXT(len) == ':') || (IS_COMBINING(NXT(len))) || (IS_EXTENDER(NXT(len)))) { buf[len] = NXT(len); len++; if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlScanName: reached XML_MAX_NAMELEN limit\n"); while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || (NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '_') || (NXT(len) == ':') || (IS_COMBINING(NXT(len))) || (IS_EXTENDER(NXT(len)))) len++; break; } } return(xmlStrndup(buf, len)); } /** * xmlParseName: * @ctxt: an XML parser context * * parse an XML name. * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender * * [5] Name ::= (Letter | '_' | ':') (NameChar)* * * [6] Names ::= Name (S Name)* * * Returns the Name parsed or NULL */ xmlChar * xmlParseName(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int c; GROW; c = CUR_CHAR(l); if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ (!IS_LETTER(c) && (c != '_') && (c != ':'))) { return(NULL); } while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlParseName: reached XML_MAX_NAMELEN limit\n"); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { NEXTL(l); c = CUR_CHAR(l); } break; } } return(xmlStrndup(buf, len)); } /** * xmlParseStringName: * @ctxt: an XML parser context * @str: a pointer to an index in the string * * parse an XML name. * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender * * [5] Name ::= (Letter | '_' | ':') (NameChar)* * * [6] Names ::= Name (S Name)* * * Returns the Name parsed or NULL. The str pointer * is updated to the current location in the string. */ xmlChar * xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { xmlChar buf[XML_MAX_NAMELEN + 5]; const xmlChar *cur = *str; int len = 0, l; int c; c = CUR_SCHAR(cur, l); if (!IS_LETTER(c) && (c != '_') && (c != ':')) { return(NULL); } while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlParseName: reached XML_MAX_NAMELEN limit\n"); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { cur += l; c = CUR_SCHAR(cur, l); } break; } } *str = cur; return(xmlStrndup(buf, len)); } /** * xmlParseNmtoken: * @ctxt: an XML parser context * * parse an XML Nmtoken. * * [7] Nmtoken ::= (NameChar)+ * * [8] Nmtokens ::= Nmtoken (S Nmtoken)* * * Returns the Nmtoken parsed or NULL */ xmlChar * xmlParseNmtoken(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN]; int len = 0; int c,l; GROW; c = CUR_CHAR(l); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n"); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { NEXTL(l); c = CUR_CHAR(l); } break; } } if (len == 0) return(NULL); return(xmlStrndup(buf, len)); } /** * xmlParseEntityValue: * @ctxt: an XML parser context * @orig: if non-NULL store a copy of the original entity value * * parse a value for ENTITY decl. * * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | * "'" ([^%&'] | PEReference | Reference)* "'" * * Returns the EntityValue parsed with reference substitued or NULL */ xmlChar * xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { xmlChar *buf = NULL; int len = 0; int size = XML_PARSER_BUFFER_SIZE; int c, l; xmlChar stop; xmlChar *ret = NULL; const xmlChar *cur = NULL; xmlParserInputPtr input; if (RAW == '"') stop = '"'; else if (RAW == '\'') stop = '\''; else { ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); return(NULL); } /* * The content of the entity definition is copied in a buffer. */ ctxt->instate = XML_PARSER_ENTITY_VALUE; input = ctxt->input; GROW; NEXT; c = CUR_CHAR(l); /* * NOTE: 4.4.5 Included in Literal * When a parameter entity reference appears in a literal entity * value, ... a single or double quote character in the replacement * text is always treated as a normal data character and will not * terminate the literal. * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) { if (len + 5 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); return(NULL); } } COPY_BUF(l,buf,len,c); NEXTL(l); /* * Pop-up of finished entities. */ while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); c = CUR_CHAR(l); if (c == 0) { GROW; c = CUR_CHAR(l); } } buf[len] = 0; /* * Raise problem w.r.t. '&' and '%' being used in non-entities * reference constructs. Note Charref will be handled in * xmlStringDecodeEntities() */ cur = buf; while (*cur != 0) { if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { xmlChar *name; xmlChar tmp = *cur; cur++; name = xmlParseStringName(ctxt, &cur); if ((name == NULL) || (*cur != ';')) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: '%c' forbidden except for entities references\n", tmp); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; } if ((ctxt->inSubset == 1) && (tmp == '%')) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: PEReferences forbidden in internal subset\n", tmp); ctxt->wellFormed = 0; ctxt->disableSAX = 1; ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; } if (name != NULL) xmlFree(name); } cur++; } /* * Then PEReference entities are substituted. */ if (c != stop) { ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; xmlFree(buf); } else { NEXT; /* * NOTE: 4.4.7 Bypassed * When a general entity reference appears in the EntityValue in * an entity declaration, it is bypassed and left as is. * so XML_SUBSTITUTE_REF is not set here. */ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 0, 0, 0); if (orig != NULL) *orig = buf; else xmlFree(buf); } return(ret); } /** * xmlParseAttValue: * @ctxt: an XML parser context * * parse a value for an attribute * Note: the parser won't do substitution of entities here, this * will be handled later in xmlStringGetNodeList * * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | * "'" ([^<&'] | Reference)* "'" * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external * parsed entity or the literal entity value of an internal parsed entity * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * * Returns the AttValue parsed or NULL. The value has to be freed by the caller. */ xmlChar * xmlParseAttValue(xmlParserCtxtPtr ctxt) { xmlChar limit = 0; xmlChar *buf = NULL; int len = 0; int buf_size = 0; int c, l; xmlChar *current = NULL; xmlEntityPtr ent; SHRINK; if (NXT(0) == '"') { ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; limit = '"'; NEXT; } else if (NXT(0) == '\'') { limit = '\''; ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; NEXT; } else { ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } /* * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); if (buf == NULL) { perror("xmlParseAttValue: malloc failed"); return(NULL); } /* * Ok loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) { if (c == 0) break; if ((c == '&') && (NXT(1) == '#')) { int val = xmlParseCharRef(ctxt); COPY_BUF(l,buf,len,val); NEXTL(l); } else if (c == '&') { ent = xmlParseEntityRef(ctxt); if ((ent != NULL) && (ctxt->replaceEntities != 0)) { xmlChar *rep; if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, 0, 0, 0); if (rep != NULL) { current = rep; while (*current != 0) { buf[len++] = *current++; if (len > buf_size - 10) { growBuffer(buf); } } xmlFree(rep); } } else { if (ent->content != NULL) buf[len++] = ent->content[0]; } } else if (ent != NULL) { int i = xmlStrlen(ent->name); const xmlChar *cur = ent->name; /* * This may look absurd but is needed to detect * entities problems */ if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { xmlChar *rep; rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, 0, 0, 0); if (rep != NULL) xmlFree(rep); } /* * Just output the reference */ buf[len++] = '&'; if (len > buf_size - i - 10) { growBuffer(buf); } for (;i > 0;i--) buf[len++] = *cur++; buf[len++] = ';'; } } else { if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { COPY_BUF(l,buf,len,0x20); if (len > buf_size - 10) { growBuffer(buf); } } else { COPY_BUF(l,buf,len,c); if (len > buf_size - 10) { growBuffer(buf); } } NEXTL(l); } GROW; c = CUR_CHAR(l); } buf[len++] = 0; if (RAW == '<') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unescaped '<' not allowed in attributes values\n"); ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else if (RAW != limit) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else NEXT; return(buf); } /** * xmlParseSystemLiteral: * @ctxt: an XML parser context * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") * * Returns the SystemLiteral parsed or NULL */ xmlChar * xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; int size = XML_PARSER_BUFFER_SIZE; int cur, l; xmlChar stop; int state = ctxt->instate; SHRINK; if (RAW == '"') { NEXT; stop = '"'; } else if (RAW == '\'') { NEXT; stop = '\''; } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); return(NULL); } ctxt->instate = XML_PARSER_SYSTEM_LITERAL; cur = CUR_CHAR(l); while ((IS_CHAR(cur)) && (cur != stop)) { if (len + 5 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); ctxt->instate = state; return(NULL); } } COPY_BUF(l,buf,len,cur); NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { GROW; SHRINK; cur = CUR_CHAR(l); } } buf[len] = 0; ctxt->instate = state; if (!IS_CHAR(cur)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { NEXT; } return(buf); } /** * xmlParsePubidLiteral: * @ctxt: an XML parser context * * parse an XML public literal * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * * Returns the PubidLiteral parsed or NULL. */ xmlChar * xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; int size = XML_PARSER_BUFFER_SIZE; xmlChar cur; xmlChar stop; SHRINK; if (RAW == '"') { NEXT; stop = '"'; } else if (RAW == '\'') { NEXT; stop = '\''; } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); return(NULL); } cur = CUR; while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { if (len + 1 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); return(NULL); } } buf[len++] = cur; NEXT; cur = CUR; if (cur == 0) { GROW; SHRINK; cur = CUR; } } buf[len] = 0; if (cur != stop) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { NEXT; } return(buf); } /** * xmlParseCharData: * @ctxt: an XML parser context * @cdata: int indicating whether we are within a CDATA section * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. * * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that * string is not marking the end of a CDATA section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur, l; SHRINK; cur = CUR_CHAR(l); while (((cur != '<') || (ctxt->token == '<')) && ((cur != '&') || (ctxt->token == '&')) && (IS_CHAR(cur))) { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { if (cdata) break; else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Sequence ']]>' not allowed in content\n"); ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; /* Should this be relaxed ??? I see a "must here */ ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } COPY_BUF(l,buf,nbchar,cur); if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { /* * Ok the segment is to be consumed as chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, nbchar); } } nbchar = 0; } NEXTL(l); cur = CUR_CHAR(l); } if (nbchar != 0) { /* * Ok the segment is to be consumed as chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, nbchar); } } } } /** * xmlParseExternalID: * @ctxt: an XML parser context * @publicID: a xmlChar** receiving PubidLiteral * @strict: indicate whether we should restrict parsing to only * production [75], see NOTE below * * Parse an External ID or a Public ID * * NOTE: Productions [75] and [83] interract badly since [75] can generate * 'PUBLIC' S PubidLiteral S SystemLiteral * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * * [83] PublicID ::= 'PUBLIC' S PubidLiteral * * Returns the function returns SystemLiteral and in the second * case publicID receives PubidLiteral, is strict is off * it is possible to return NULL and have publicID set. */ xmlChar * xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { xmlChar *URI = NULL; SHRINK; if ((RAW == 'S') && (NXT(1) == 'Y') && (NXT(2) == 'S') && (NXT(3) == 'T') && (NXT(4) == 'E') && (NXT(5) == 'M')) { SKIP(6); if (!IS_BLANK(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after 'SYSTEM'\n"); ctxt->errNo = XML_ERR_SPACE_REQUIRED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); if (URI == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseExternalID: SYSTEM, no URI\n"); ctxt->errNo = XML_ERR_URI_REQUIRED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } else if ((RAW == 'P') && (NXT(1) == 'U') && (NXT(2) == 'B') && (NXT(3) == 'L') && (NXT(4) == 'I') && (NXT(5) == 'C')) { SKIP(6); if (!IS_BLANK(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after 'PUBLIC'\n"); ctxt->errNo = XML_ERR_SPACE_REQUIRED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } SKIP_BLANKS; *publicID = xmlParsePubidLiteral(ctxt); if (*publicID == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseExternalID: PUBLIC, no Public Identifier\n"); ctxt->errNo = XML_ERR_PUBID_REQUIRED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if (strict) { /* * We don't handle [83] so "S SystemLiteral" is required. */ if (!IS_BLANK(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after the Public Identifier\n"); ctxt->errNo = XML_ERR_SPACE_REQUIRED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } else { /* * We handle [83] so we return immediately, if * "S SystemLiteral" is not detected. From a purely parsing * point of view that's a nice mess. */ const xmlChar *ptr; GROW; ptr = CUR_PTR; if (!IS_BLANK(*ptr)) return(NULL); while (IS_BLANK(*ptr)) ptr++; if ((*ptr != '\'') && (*ptr != '"')) return(NULL); } SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); if (URI == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseExternalID: PUBLIC, no URI\n"); ctxt->errNo = XML_ERR_URI_REQUIRED; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } return(URI); } /** * xmlParseComment: * @ctxt: an XML parser context * * Skip an XML (SGML) comment * The spec says that "For compatibility, the string "--" (double-hyphen) * must not occur within comments. " * * [15] Comment ::= '' */ void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len; int size = XML_PARSER_BUFFER_SIZE; int q, ql; int r, rl; int cur, l; xmlParserInputState state; xmlParserInputPtr input = ctxt->input; /* * Check that there is a comment right here. */ if ((RAW != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; state = ctxt->instate; ctxt->instate = XML_PARSER_COMMENT; SHRINK; SKIP(4); buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); ctxt->instate = state; return; } q = CUR_CHAR(ql); NEXTL(ql); r = CUR_CHAR(rl); NEXTL(rl); cur = CUR_CHAR(l); len = 0; while (IS_CHAR(cur) && ((cur != '>') || (r != '-') || (q != '-'))) { if ((r == '-') && (q == '-') && (len > 1)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Comment must not contain '--' (double-hyphen)`\n"); ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if (len + 5 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); ctxt->instate = state; return; } } COPY_BUF(ql,buf,len,q); q = r; ql = rl; r = cur; rl = l; NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { SHRINK; GROW; cur = CUR_CHAR(l); } } buf[len] = 0; if (!IS_CHAR(cur)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Comment not terminated \n