diff --git a/ChangeLog b/ChangeLog
index 37d8414f..85adc27a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Wed Jul 7 09:28:43 CEST 1999 Daniel Veillard
+
+ * HTMLparser.[ch], HTMLtree.[ch]: more work for HTML parsing and
+ output.
+ * Makefile.am, test/HTML/*, result/HTML/*: added HTMLtests targetestHTMLt
+
Wed Jul 7 00:25:42 CEST 1999 Daniel Veillard
* parser.h : Oops removed the binary compatibility problem
diff --git a/HTMLparser.c b/HTMLparser.c
index 119daa20..5259f6e8 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -34,7 +34,7 @@
#include "valid.h"
#include "parserInternals.h"
-#define DEBUG */
+/* #define DEBUG */
/************************************************************************
* *
@@ -351,7 +351,6 @@ htmlInitAutoClose(void) {
htmlElemDescPtr
htmlTagLookup(const CHAR *tag) {
int i = 0;
- int cnt;
for (i = 0; i < (sizeof(html40ElementTable) /
sizeof(html40ElementTable[0]));i++) {
@@ -408,7 +407,6 @@ htmlCheckAutoClose(const CHAR *new, const CHAR *old) {
*/
void
htmlAutoClose(htmlParserCtxtPtr ctxt, const CHAR *new) {
- const CHAR *old;
while ((ctxt->node != NULL) &&
(htmlCheckAutoClose(new, ctxt->node->name))) {
@@ -1933,7 +1931,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
if (atts == NULL) {
fprintf(stderr, "malloc of %ld byte failed\n",
- maxatts * sizeof(CHAR *));
+ maxatts * (long)sizeof(CHAR *));
return(NULL);
}
} else if (nbatts + 2 < maxatts) {
@@ -1941,7 +1939,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
if (atts == NULL) {
fprintf(stderr, "realloc of %ld byte failed\n",
- maxatts * sizeof(CHAR *));
+ maxatts * (long)sizeof(CHAR *));
return(NULL);
}
}
diff --git a/HTMLparser.h b/HTMLparser.h
index 23ff9afc..749566ca 100644
--- a/HTMLparser.h
+++ b/HTMLparser.h
@@ -48,8 +48,10 @@ typedef struct htmlEntityDesc {
/*
* There is only few public functions.
*/
-htmlEntityDescPtr
-htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
+htmlElemDescPtr htmlTagLookup(const CHAR *tag);
+htmlEntityDescPtr htmlEntityLookup(const CHAR *name);
+
+htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);
diff --git a/HTMLtree.c b/HTMLtree.c
index a8e91da7..0d4b45f1 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -12,14 +12,11 @@
#include
#include /* for memset() only ! */
-#include "tree.h"
+#include "HTMLparser.h"
+#include "HTMLtree.h"
#include "entities.h"
#include "valid.h"
-#define HTML_TEXT_NODE XML_TEXT_NODE
-#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
-#define HTML_COMMENT_NODE XML_COMMENT_NODE
-
/**
* htmlDtdDump:
* @buf: the HTML buffer output
@@ -46,23 +43,6 @@ htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
- if ((cur->entities == NULL) && (cur->elements == NULL) &&
- (cur->attributes == NULL) && (cur->notations == NULL)) {
- xmlBufferWriteChar(buf, ">\n");
- return;
- }
- xmlBufferWriteChar(buf, " [\n");
- if (cur->entities != NULL)
- xmlDumpEntitiesTable(buf, (xmlEntitiesTablePtr) cur->entities);
- if (cur->notations != NULL)
- xmlDumpNotationTable(buf, (xmlNotationTablePtr) cur->notations);
- if (cur->elements != NULL)
- xmlDumpElementTable(buf, (xmlElementTablePtr) cur->elements);
- if (cur->attributes != NULL)
- xmlDumpAttributeTable(buf, (xmlAttributeTablePtr) cur->attributes);
- xmlBufferWriteChar(buf, "]");
-
- /* TODO !!! a lot more things to dump ... */
xmlBufferWriteChar(buf, ">\n");
}
@@ -116,30 +96,23 @@ htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
static void
-htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level);
+htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
/**
* htmlNodeListDump:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the first node
- * @level: the imbrication level for indenting
*
* Dump an HTML node list, recursive behaviour,children are printed too.
*/
static void
-htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
- int i;
-
+htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
if (cur == NULL) {
fprintf(stderr, "htmlNodeListDump : node == NULL\n");
return;
}
while (cur != NULL) {
- if ((cur->type != HTML_TEXT_NODE) &&
- (cur->type != HTML_ENTITY_REF_NODE)) {
- xmlBufferWriteChar(buf, "\n");
- }
- htmlNodeDump(buf, doc, cur, level);
+ htmlNodeDump(buf, doc, cur);
cur = cur->next;
}
}
@@ -149,22 +122,26 @@ htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
- * @level: the imbrication level for indenting
*
* Dump an HTML node, recursive behaviour,children are printed too.
*/
static void
-htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
+htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
int i;
+ htmlElemDescPtr info;
if (cur == NULL) {
fprintf(stderr, "htmlNodeDump : node == NULL\n");
return;
}
+ /*
+ * Special cases.
+ */
if (cur->type == HTML_TEXT_NODE) {
if (cur->content != NULL) {
CHAR *buffer;
+ /* uses the HTML encoding routine !!!!!!!!!! */
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
if (buffer != NULL) {
xmlBufferWriteCHAR(buf, buffer);
@@ -188,20 +165,38 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
return;
}
- xmlBufferWriteChar(buf, "<");
- if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
- xmlBufferWriteCHAR(buf, cur->ns->prefix);
- xmlBufferWriteChar(buf, ":");
- }
+ /*
+ * Get specific HTmL info for taht node.
+ */
+ info = htmlTagLookup(cur->name);
+ xmlBufferWriteChar(buf, "<");
xmlBufferWriteCHAR(buf, cur->name);
- if (cur->nsDef)
- xmlNsListDump(buf, cur->nsDef);
if (cur->properties != NULL)
htmlAttrListDump(buf, doc, cur->properties);
+ if (info->empty) {
+ xmlBufferWriteChar(buf, ">");
+ if (cur->next != NULL) {
+ if ((cur->next->type != HTML_TEXT_NODE) &&
+ (cur->next->type != HTML_ENTITY_REF_NODE))
+ xmlBufferWriteChar(buf, "\n");
+ }
+ return;
+ }
if ((cur->content == NULL) && (cur->childs == NULL)) {
- xmlBufferWriteChar(buf, "/>\n");
+ if (info->endTag != 0)
+ xmlBufferWriteChar(buf, ">");
+ else {
+ xmlBufferWriteChar(buf, ">");
+ xmlBufferWriteCHAR(buf, cur->name);
+ xmlBufferWriteChar(buf, ">");
+ }
+ if (cur->next != NULL) {
+ if ((cur->next->type != HTML_TEXT_NODE) &&
+ (cur->next->type != HTML_ENTITY_REF_NODE))
+ xmlBufferWriteChar(buf, "\n");
+ }
return;
}
xmlBufferWriteChar(buf, ">");
@@ -215,16 +210,22 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
}
}
if (cur->childs != NULL) {
- htmlNodeListDump(buf, doc, cur->childs, level + 1);
+ if ((cur->childs->type != HTML_TEXT_NODE) &&
+ (cur->childs->type != HTML_ENTITY_REF_NODE))
+ xmlBufferWriteChar(buf, "\n");
+ htmlNodeListDump(buf, doc, cur->childs);
+ if ((cur->last->type != HTML_TEXT_NODE) &&
+ (cur->last->type != HTML_ENTITY_REF_NODE))
+ xmlBufferWriteChar(buf, "\n");
}
xmlBufferWriteChar(buf, "");
- if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
- xmlBufferWriteCHAR(buf, cur->ns->prefix);
- xmlBufferWriteChar(buf, ":");
- }
-
xmlBufferWriteCHAR(buf, cur->name);
- xmlBufferWriteChar(buf, ">\n");
+ xmlBufferWriteChar(buf, ">");
+ if (cur->next != NULL) {
+ if ((cur->next->type != HTML_TEXT_NODE) &&
+ (cur->next->type != HTML_ENTITY_REF_NODE))
+ xmlBufferWriteChar(buf, "\n");
+ }
}
/**
@@ -236,29 +237,12 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
*/
static void
htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
- xmlBufferWriteChar(buf, "version != NULL)
- xmlBufferWriteQuotedString(buf, cur->version);
- else
- xmlBufferWriteChar(buf, "\"1.0\"");
- if (cur->encoding != NULL) {
- xmlBufferWriteChar(buf, " encoding=");
- xmlBufferWriteQuotedString(buf, cur->encoding);
- }
- switch (cur->standalone) {
- case 0:
- xmlBufferWriteChar(buf, " standalone=\"no\"");
- break;
- case 1:
- xmlBufferWriteChar(buf, " standalone=\"yes\"");
- break;
- }
- xmlBufferWriteChar(buf, "?>\n");
if (cur->intSubset != NULL)
htmlDtdDump(buf, cur);
if (cur->root != NULL) {
- htmlNodeDump(buf, cur, cur->root, 0);
+ htmlNodeDump(buf, cur, cur->root);
}
+ xmlBufferWriteChar(buf, "\n");
}
/**
diff --git a/HTMLtree.h b/HTMLtree.h
new file mode 100644
index 00000000..09206dbb
--- /dev/null
+++ b/HTMLtree.h
@@ -0,0 +1,34 @@
+/*
+ * tree.h : describes the structures found in an tree resulting
+ * from an XML parsing.
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel.Veillard@w3.org
+ */
+
+#ifndef __HTML_TREE_H__
+#define __HTML_TREE_H__
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include "tree.h"
+
+#define HTML_TEXT_NODE XML_TEXT_NODE
+#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
+#define HTML_COMMENT_NODE XML_COMMENT_NODE
+
+void htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size);
+void htmlDocDump(FILE *f, xmlDocPtr cur);
+int htmlSaveFile(const char *filename, xmlDocPtr cur);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HTML_TREE_H__ */
+
diff --git a/Makefile.am b/Makefile.am
index e06d4640..0f239c3c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -61,7 +61,24 @@ check-local: tests
testall : tests SVGtests SAXtests
-tests : tester
+tests: HTMLtests XMLtests
+HTMLtests : testHTML
+ @(DIR=`pwd`; cd $(srcdir) ; \
+ for i in test/HTML/* ; do \
+ if [ ! -d $$i ] ; then \
+ if [ ! -f result/HTML/`basename $$i` ] ; then \
+ echo New test file `basename $$i` ; \
+ $$DIR/testHTML $$i > result/HTML/`basename $$i` ; \
+ else \
+ echo Testing `basename $$i` ; \
+ $$DIR/testHTML $$i > result.`basename $$i` ; \
+ diff result/HTML/`basename $$i` result.`basename $$i` ; \
+ $$DIR/testHTML result.`basename $$i` > result2.`basename $$i` ; \
+ diff result.`basename $$i` result2.`basename $$i` ; \
+ rm result.`basename $$i` result2.`basename $$i` ; \
+ fi ; fi ; done)
+
+XMLtests : tester
@(DIR=`pwd`; cd $(srcdir) ; \
for i in test/* ; do \
if [ ! -d $$i ] ; then \
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 23ff9afc..749566ca 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -48,8 +48,10 @@ typedef struct htmlEntityDesc {
/*
* There is only few public functions.
*/
-htmlEntityDescPtr
-htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
+htmlElemDescPtr htmlTagLookup(const CHAR *tag);
+htmlEntityDescPtr htmlEntityLookup(const CHAR *name);
+
+htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);
diff --git a/include/libxml/HTMLtree.h b/include/libxml/HTMLtree.h
new file mode 100644
index 00000000..09206dbb
--- /dev/null
+++ b/include/libxml/HTMLtree.h
@@ -0,0 +1,34 @@
+/*
+ * tree.h : describes the structures found in an tree resulting
+ * from an XML parsing.
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel.Veillard@w3.org
+ */
+
+#ifndef __HTML_TREE_H__
+#define __HTML_TREE_H__
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include "tree.h"
+
+#define HTML_TEXT_NODE XML_TEXT_NODE
+#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
+#define HTML_COMMENT_NODE XML_COMMENT_NODE
+
+void htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size);
+void htmlDocDump(FILE *f, xmlDocPtr cur);
+int htmlSaveFile(const char *filename, xmlDocPtr cur);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HTML_TREE_H__ */
+
diff --git a/result/HTML/Down.html b/result/HTML/Down.html
new file mode 100644
index 00000000..5504cd4e
--- /dev/null
+++ b/result/HTML/Down.html
@@ -0,0 +1,12 @@
+
+
+
+This service is temporary down
+
+
+Sorry, this service is temporary down
+We are doing our best to get it back on-line,
+
+The W3C system administrators
+
+
diff --git a/result/HTML/test2.html b/result/HTML/test2.html
new file mode 100644
index 00000000..c462702c
--- /dev/null
+++ b/result/HTML/test2.html
@@ -0,0 +1,41 @@
+
+
+
+Linux Today
+
+
+
+
+
+[ headlines |
+features |
+commercial |
+security |
+jobs |
+volt |
+contribute/submit |
+advertise |
+search |
+site digests |
+mailing lists |
+about us |
+link us ]
+
+
+
+
diff --git a/result/HTML/test3.html b/result/HTML/test3.html
new file mode 100644
index 00000000..0c47a2e8
--- /dev/null
+++ b/result/HTML/test3.html
@@ -0,0 +1,88 @@
+
+
+
+
+
+
+Component Package diagram ProblemDomain
+
+
+
+
+-
+Stereotype problem domain
+-
+Alias Problem Domain
+-
+Note
+
- The Problem Domain package is the model behind the Human
+
- Interface, thats stores and manipulates the Family Tree.
+
+
+
+
+
+
+
+-
+
+
+-
+
+
+-
+
+
+-
+
+
+-
+
+
+-
+
+
+-
+
+
+-
+
+
+-
+
+
+
+
+Links
+
+
+
+
+
+
+
+
diff --git a/test/HTML/Down.html b/test/HTML/Down.html
new file mode 100644
index 00000000..0f366479
--- /dev/null
+++ b/test/HTML/Down.html
@@ -0,0 +1,14 @@
+
+
+
+ This service is temporary down
+
+
+
+Sorry, this service is temporary down
+We are doing our best to get it back on-line,
+
+The W3C system administrators
+
+
diff --git a/test/HTML/test2.html b/test/HTML/test2.html
new file mode 100644
index 00000000..c8fd44c5
--- /dev/null
+++ b/test/HTML/test2.html
@@ -0,0 +1,33 @@
+
+ Linux Today
+
+
+
+
+
+[ headlines |
+features |
+commercial |
+security |
+jobs |
+volt |
+contribute/submit |
+advertise |
+search |
+site digests |
+mailing lists |
+about us |
+link us ]
+
+
+
+
diff --git a/test/HTML/test3.html b/test/HTML/test3.html
new file mode 100644
index 00000000..af1f1908
--- /dev/null
+++ b/test/HTML/test3.html
@@ -0,0 +1,34 @@
+
+
+
+
+Component Package diagram ProblemDomain
+
+
+- Stereotype problem domain
+- Alias Problem Domain
+- Note
- The Problem Domain package is the model behind the Human
+
- Interface, thats stores and manipulates the Family Tree.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Links
+
+
+
+
+
+
+
diff --git a/testHTML.c b/testHTML.c
index 4b214fac..27afe35e 100644
--- a/testHTML.c
+++ b/testHTML.c
@@ -27,7 +27,7 @@
#include
#include "HTMLparser.h"
-#include "tree.h"
+#include "HTMLtree.h"
#include "debugXML.h"
static int debug = 0;
@@ -80,7 +80,7 @@ void parseAndPrintFile(char *filename) {
* print it.
*/
if (!debug)
- xmlDocDump(stdout, doc);
+ htmlDocDump(stdout, doc);
else
xmlDebugDumpDocument(stdout, doc);
@@ -111,7 +111,7 @@ void parseAndPrintBuffer(CHAR *buf) {
* print it.
*/
if (!debug)
- xmlDocDump(stdout, doc);
+ htmlDocDump(stdout, doc);
else
xmlDebugDumpDocument(stdout, doc);