From 7704fb1d9fa131b0077db22e470f1187645dc6c4 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Fri, 3 Jan 2003 16:19:51 +0000 Subject: [PATCH] added the --stream flag to use the TextReader API small performance tweak * xmllint.c: added the --stream flag to use the TextReader API * xmlreader.c: small performance tweak Daniel --- ChangeLog | 5 +++ xmllint.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++--- xmlreader.c | 11 +++++-- 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 81404ed2..1fda4425 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Fri Jan 3 17:18:32 CET 2003 Daniel Veillard + + * xmllint.c: added the --stream flag to use the TextReader API + * xmlreader.c: small performance tweak + Fri Jan 3 13:50:55 CET 2003 Daniel Veillard * xmlreader.c python/tests/reader2py: okay the DTD validation diff --git a/xmllint.c b/xmllint.c index 5c166609..41dac2fd 100644 --- a/xmllint.c +++ b/xmllint.c @@ -81,6 +81,7 @@ #include #endif #include +#include #ifdef LIBXML_DEBUG_ENABLED static int debug = 0; @@ -124,6 +125,7 @@ static int dropdtd = 0; static int catalogs = 0; static int nocatalogs = 0; #endif +static int stream = 0; static const char *output = NULL; @@ -563,7 +565,75 @@ static void myClose(FILE *f) { /************************************************************************ * * - * Test processing * + * Stream Test processing * + * * + ************************************************************************/ +static int count = 0; +static int elem, attrs; + +static void processNode(xmlTextReaderPtr reader) { + if (debug) { + xmlChar *name, *value; + + name = xmlTextReaderName(reader); + if (name == NULL) + name = xmlStrdup(BAD_CAST "--"); + value = xmlTextReaderValue(reader); + + printf("%d %d %d %s", + xmlTextReaderDepth(reader), + xmlTextReaderNodeType(reader), + xmlTextReaderIsEmptyElement(reader), + name); + xmlFree(name); + if (value == NULL) + printf("\n"); + else { + printf(" %s\n", value); + xmlFree(value); + } + } +} + +static void streamFile(char *filename) { + xmlTextReaderPtr reader; + int ret; + + if (count) { + elem = 0; + attrs = 0; + } + + reader = xmlNewTextReaderFilename(filename); + if (reader != NULL) { + if (valid) + xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1); + + /* + * Process all nodes in sequence + */ + ret = xmlTextReaderRead(reader); + while (ret == 1) { + if (debug) + processNode(reader); + ret = xmlTextReaderRead(reader); + } + + /* + * Done, cleanup and status + */ + xmlFreeTextReader(reader); + if (ret != 0) { + printf("%s : failed to parse\n", filename); + } + } else { + fprintf(stderr, "Unable to open %s\n", filename); + } +} + +/************************************************************************ + * * + * Tree Test processing * * * ************************************************************************/ static void parseAndPrintFile(char *filename) { @@ -1106,12 +1176,13 @@ static void usage(const char *name) { printf("\t--loaddtd : fetch external DTD\n"); printf("\t--dtdattr : loaddtd + populate the tree with inherited attributes \n"); printf("\t--dropdtd : remove the DOCTYPE of the input docs\n"); + printf("\t--stream : use the streaming interface to process very large files\n"); printf("\nLibxml project home page: http://xmlsoft.org/\n"); printf("To report bugs or get some help check: http://xmlsoft.org/bugs.html\n"); } int main(int argc, char **argv) { - int i, count; + int i, acount; int files = 0; int version = 0; @@ -1277,6 +1348,10 @@ main(int argc, char **argv) { noblanks++; format++; xmlKeepBlanksDefault(0); + } + else if ((!strcmp(argv[i], "-stream")) || + (!strcmp(argv[i], "--stream"))) { + stream++; } else { fprintf(stderr, "Unknown option %s\n", argv[i]); usage(argv[0]); @@ -1338,10 +1413,17 @@ main(int argc, char **argv) { /* Remember file names. "-" means stdin. */ if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) { if (repeat) { - for (count = 0;count < 100 * repeat;count++) + for (acount = 0;acount < 100 * repeat;acount++) + if (stream != 0) + streamFile(argv[i]); + else + parseAndPrintFile(argv[i]); + } else { + if (stream != 0) + streamFile(argv[i]); + else parseAndPrintFile(argv[i]); - } else - parseAndPrintFile(argv[i]); + } files ++; if ((timing) && (repeat)) { endTimer("100 iterations"); diff --git a/xmlreader.c b/xmlreader.c index 5bf93553..159447a9 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -10,6 +10,14 @@ * daniel@veillard.com */ +/* + * TODOs: + * - provide an API to expand part of the tree + * - provide an API to preserve part of the tree + * - Streaming XInclude support + * - setting(s) for NoBlanks + * - performances and tuning ... + */ #define IN_LIBXML #include "libxml.h" @@ -357,8 +365,7 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { } else break; } - if ((inbuf->content[cur] == '>') || (inbuf->content[cur] == '&') || - (inbuf->content[cur] == ';')) { + if (inbuf->content[cur] == '>') { cur = cur + 1; val = xmlParseChunk(reader->ctxt, (const char *) &inbuf->content[reader->cur],