diff --git a/ChangeLog b/ChangeLog index 050af7e0..61c12630 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +Tue Jun 22 23:46:32 CEST 1999 Daniel Veillard + + * released 1.3.0 with xmlEncodeEntities restoring old behaviour + and xmlEncodeEntitiesReentrant with the correct one :-\ + +Mon Jun 21 14:07:53 CEST 1999 Daniel Veillard + + * commit of my internal XML base changes, quite a lot of + changes, cleanups, better entities support, framework for + new I/O and charset detection and handling + * Fixed the configure/Makefile stuff to generate shared libs + with the proper version info, so we jumped on rev from + 0.0.0 to 1.2.0 ! The binary interfaces have been broken, + xmlEncodeEntities() result need to be freed now, and a string + xmlParserVersion provide the current library version. + Tue Jun 15 14:24:19 1999 Raph Levien * parser.c: fixed a buffer overrun for when you have a very long diff --git a/Makefile.am b/Makefile.am index 75e75342..ed6baaa4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,7 +2,9 @@ SUBDIRS = doc -INCLUDES = -I@srcdir@ @CORBA_CFLAGS@ +INCLUDES = -I@srcdir@ @CORBA_CFLAGS@ $(VERSION_FLAGS) + +VERSION_FLAGS = -DLIBXML_VERSION=\"@LIBXML_VERSION@\" noinst_PROGRAMS=tester testSAX @@ -10,6 +12,8 @@ bin_SCRIPTS=xml-config lib_LTLIBRARIES = libxml.la +libxml_la_LDFLAGS = -version-info @LIBXML_VERSION_INFO@ + libxml_la_SOURCES = \ SAX.c \ entities.c \ @@ -29,6 +33,7 @@ xmlinc_HEADERS = \ debugXML.h \ xml-error.h \ tree.h \ + xmlIO.h \ valid.h DEPS = $(top_builddir)/libxml.la diff --git a/SAX.c b/SAX.c index bd0caf59..d1897a45 100644 --- a/SAX.c +++ b/SAX.c @@ -808,10 +808,13 @@ void comment(void *ctx, const CHAR *value) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + #ifdef DEBUG_SAX fprintf(stderr, "SAX.comment(%s)\n", value); #endif - xmlNewDocComment(ctxt->myDoc, value); + ret = xmlNewDocComment(ctxt->myDoc, value); + xmlAddChild(ctxt->node, ret); } xmlSAXHandler xmlDefaultSAXHandler = { diff --git a/configure.in b/configure.in index 59e2d58d..67eeaf03 100644 --- a/configure.in +++ b/configure.in @@ -2,7 +2,22 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.2) AC_INIT(entities.h) AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(libxml, 1.1.0) + +LIBXML_MAJOR_VERSION=1 +LIBXML_MINOR_VERSION=3 +LIBXML_MICRO_VERSION=0 +LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION +LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION + +AC_SUBST(LIBXML_MAJOR_VERSION) +AC_SUBST(LIBXML_MINOR_VERSION) +AC_SUBST(LIBXML_MICRO_VERSION) +AC_SUBST(LIBXML_VERSION) +AC_SUBST(LIBXML_VERSION_INFO) + +VERSION=$LIBXML_VERSION + +AM_INIT_AUTOMAKE(libxml, $VERSION) AC_ARG_WITH(html-dir, [ --with-html-dir=PATH path to installed docs ]) diff --git a/doc/html/book1.html b/doc/html/book1.html index d3b9d007..8f8a6084 100644 --- a/doc/html/book1.html +++ b/doc/html/book1.html @@ -15,13 +15,30 @@ TEXT="#000000" CLASS="BOOK" >
Table of Contents
1. Gnome XML Library
parser — one line description goes here. —
tree — one line description goes here.
entities — one line description goes here.
valid — one line description goes here.
xml-error — one line description goes here.
parserInternals — one line description goes here.
Gnome XML Library Reference Manual
Gnome XML Library Reference Manual
Prev<<< Previous PageHomeUpNextNext Page >>>

entities

Name

entities — one line description goes here.
entities —

Synopsis

xmlEntityPtr +>; #define XML_MIN_ENTITIES_TABLExmlEntitiesTablePtr +>; void xmlAddDocEntity

Description










xmlAddDocEntity()

xmlAddDocEntity ()

xmlAddDtdEntity()

xmlAddDtdEntity ()

xmlGetPredefinedEntity()

xmlGetPredefinedEntity ()

xmlGetDocEntity()

xmlGetDocEntity ()

xmlGetDtdEntity()

xmlGetDtdEntity ()

xmlEncodeEntities()

xmlEncodeEntities ()

xmlCreateEntitiesTable()

xmlCreateEntitiesTable ()

xmlCopyEntitiesTable()

xmlCopyEntitiesTable ()

xmlFreeEntitiesTable()

xmlFreeEntitiesTable ()

xmlDumpEntitiesTable()

xmlDumpEntitiesTable ()


valid
Prev<<< Previous PageHomeHomeUpNextNext Page >>>
treeUptreevalid
Gnome XML Library Reference Manual
Gnome XML Library Reference Manual
Prev<<< Previous PageHomeUpNextNext Page >>>

parser

Name

parser — one line description goes here.
parser —
SynopsisxmlParserInputPtr +>; typedef xmlParserNodeInfo +>; typedef xmlParserNodeInfoSeq +>; typedef xmlParserNodeInfoSeqPtr +>; typedef xmlParserCtxt +>; typedef xmlParserCtxtPtr +>; typedef xmlSAXLocator +>; typedef xmlSAXLocatorPtr +>; xmlParserInputPtrxmlSAXHandlerPtr +>; +extern xmlSAXLocator xmlDefaultSAXLocator; +extern xmlSAXHandler xmlDefaultSAXHandler; CHAR

Description


xmlParserInputDeallocate()

xmlParserInputDeallocate ()









resolveEntitySAXFunc()

resolveEntitySAXFunc ()

internalSubsetSAXFunc()

internalSubsetSAXFunc ()

getEntitySAXFunc()

getEntitySAXFunc ()

entityDeclSAXFunc()

entityDeclSAXFunc ()

notationDeclSAXFunc()

notationDeclSAXFunc ()

attributeDeclSAXFunc()

attributeDeclSAXFunc ()

elementDeclSAXFunc()

elementDeclSAXFunc ()

unparsedEntityDeclSAXFunc()

unparsedEntityDeclSAXFunc ()

setDocumentLocatorSAXFunc()

setDocumentLocatorSAXFunc ()

startDocumentSAXFunc()

startDocumentSAXFunc ()

endDocumentSAXFunc()

endDocumentSAXFunc ()

startElementSAXFunc()

startElementSAXFunc ()

endElementSAXFunc()

endElementSAXFunc ()

attributeSAXFunc()

attributeSAXFunc ()

referenceSAXFunc()

referenceSAXFunc ()

charactersSAXFunc()

charactersSAXFunc ()

ignorableWhitespaceSAXFunc()

ignorableWhitespaceSAXFunc ()

processingInstructionSAXFunc()

processingInstructionSAXFunc ()

commentSAXFunc()

commentSAXFunc ()

warningSAXFunc()

warningSAXFunc ()

errorSAXFunc()

errorSAXFunc ()

fatalErrorSAXFunc()

fatalErrorSAXFunc ()

isStandaloneSAXFunc()

isStandaloneSAXFunc ()

hasInternalSubsetSAXFunc()

hasInternalSubsetSAXFunc ()

hasExternalSubsetSAXFunc()

hasExternalSubsetSAXFunc ()


xmlDefaultSAXLocator

extern xmlSAXLocator xmlDefaultSAXLocator;


xmlDefaultSAXHandler

extern xmlSAXHandler xmlDefaultSAXHandler;


xmlStrdup()

xmlStrdup ()

xmlStrndup()

xmlStrndup ()

xmlStrchr()

xmlStrchr ()

xmlStrcmp()

xmlStrcmp ()

xmlStrncmp()

xmlStrncmp ()

xmlStrlen()

xmlStrlen ()

xmlStrcat()

xmlStrcat ()

xmlStrncat()

xmlStrncat ()

xmlParseDoc()

xmlParseDoc ()

xmlParseMemory()

xmlParseMemory ()

xmlParseFile()

xmlParseFile ()

xmlSubstituteEntitiesDefault()

xmlSubstituteEntitiesDefault ()

xmlRecoverDoc()

xmlRecoverDoc ()

xmlRecoverMemory()

xmlRecoverMemory ()

xmlRecoverFile()

xmlRecoverFile ()

xmlParseDocument()

xmlParseDocument ()

xmlSAXParseDoc()

xmlSAXParseDoc ()

xmlSAXParseMemory()

xmlSAXParseMemory ()

xmlSAXParseFile()

xmlSAXParseFile ()

xmlParseDTD()

xmlParseDTD ()

xmlSAXParseDTD()

xmlSAXParseDTD ()

xmlInitParserCtxt()

xmlInitParserCtxt ()

xmlClearParserCtxt()

xmlClearParserCtxt ()

xmlSetupParserForBuffer()

xmlSetupParserForBuffer ()

xmlParserFindNodeInfo()

xmlParserFindNodeInfo ()

xmlInitNodeInfoSeq()

xmlInitNodeInfoSeq ()

xmlClearNodeInfoSeq()

xmlClearNodeInfoSeq ()

xmlParserFindNodeInfoIndex()

xmlParserFindNodeInfoIndex ()

xmlParserAddNodeInfo()

xmlParserAddNodeInfo ()

xmlDefaultSAXHandlerInit()

xmlDefaultSAXHandlerInit ()


tree
Prev<<< Previous PageHomeHomeUpNextNext Page >>>
Gnome XML LibraryUpGnome XML Librarytree
Gnome XML Library Reference Manual
Gnome XML Library Reference Manual
Prev<<< Previous PageHomeUp 

parserInternals

Name

parserInternals — one line description goes here.
parserInternals —

Synopsis

CHAR **value); -void CHAR* xmlParseStartTag ( (xmlParserCtxtPtr ctxt); +> ctxt, + CHAR *tagname); void xmlParseCDSectCHAR end3); end3); +int nodePush (xmlParserCtxtPtr ctxt, + xmlNodePtr value); +xmlNodePtrnodePop (xmlParserCtxtPtr ctxt); +int inputPush (xmlParserCtxtPtr ctxt, + xmlParserInputPtr value); +xmlParserInputPtrinputPop (xmlParserCtxtPtr ctxt);

Description

Details


IS_CHAR()







#define IS_IDEOGRAPHIC(c)						\
-    ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||				\
-     (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||				\
-     (((c) >= 0x3021) && ((c) <= 0x3029)) ||				\
-      ((c) == 0x3007))
#define IS_IDEOGRAPHIC(c)


#define IS_PUBIDCHAR(c)							\
-    (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||			\
-     (((c) >= 'a') && ((c) <= 'z')) ||					\
-     (((c) >= 'A') && ((c) <= 'Z')) ||					\
-     (((c) >= '0') && ((c) <= '9')) ||					\
-     ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||	\
-     ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||	\
-     ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||	\
-     ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||	\
-     ((c) == '$') || ((c) == '_') || ((c) == '%'))
#define IS_PUBIDCHAR(c)

#define SKIP_EOL(p) 							\
-    if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; }			\
-    if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
#define SKIP_EOL(p)

#define MOVETO_ENDTAG(p)						\
-    while (IS_CHAR(*p) && (*(p) != '>')) (p)++
#define MOVETO_ENDTAG(p)

#define MOVETO_STARTTAG(p)						\
-    while (IS_CHAR(*p) && (*(p) != '<')) (p)++
#define MOVETO_STARTTAG(p)

xmlCreateDocParserCtxt()

xmlCreateDocParserCtxt ()

xmlCreateFileParserCtxt()

xmlCreateFileParserCtxt ()

xmlCreateMemoryParserCtxt()

xmlCreateMemoryParserCtxt ()

xmlFreeParserCtxt()

xmlFreeParserCtxt ()

xmlHandleEntity()

xmlHandleEntity ()

xmlNewEntityInputStream()

xmlNewEntityInputStream ()

xmlPushInput()

xmlPushInput ()

xmlPopInput()

xmlPopInput ()

xmlFreeInputStream()

xmlFreeInputStream ()

xmlNewInputFromFile()

xmlNewInputFromFile ()

xmlSplitQName()

xmlSplitQName ()

xmlNamespaceParseNCName()

xmlNamespaceParseNCName ()

xmlNamespaceParseQName()

xmlNamespaceParseQName ()

xmlNamespaceParseNSDef()

xmlNamespaceParseNSDef ()

xmlParseQuotedString()

xmlParseQuotedString ()

xmlParseNamespace()

xmlParseNamespace ()

xmlParseName()

xmlParseName ()

xmlParseNmtoken()

xmlParseNmtoken ()

xmlParseEntityValue()

xmlParseEntityValue ()

xmlParseAttValue()

xmlParseAttValue ()

xmlParseSystemLiteral()

xmlParseSystemLiteral ()

xmlParsePubidLiteral()

xmlParsePubidLiteral ()

xmlParseCharData()

xmlParseCharData ()

xmlParseExternalID()

xmlParseExternalID ()

xmlParseComment()

xmlParseComment ()

xmlParsePITarget()

xmlParsePITarget ()

xmlParsePI()

xmlParsePI ()

xmlParseNotationDecl()

xmlParseNotationDecl ()

xmlParseEntityDecl()

xmlParseEntityDecl ()

xmlParseDefaultDecl()

xmlParseDefaultDecl ()

xmlParseNotationType()

xmlParseNotationType ()

xmlParseEnumerationType()

xmlParseEnumerationType ()

xmlParseEnumeratedType()

xmlParseEnumeratedType ()

xmlParseAttributeType()

xmlParseAttributeType ()

xmlParseAttributeListDecl()

xmlParseAttributeListDecl ()

xmlParseElementMixedContentDecl()

xmlParseElementMixedContentDecl ()

xmlParseElementChildrenContentDecl()

xmlParseElementChildrenContentDecl ()

xmlParseElementContentDecl()

xmlParseElementContentDecl ()

xmlParseElementDecl()

xmlParseElementDecl ()

xmlParseMarkupDecl()

xmlParseMarkupDecl ()

xmlParseCharRef()

xmlParseCharRef ()

xmlParseEntityRef()

xmlParseEntityRef ()

xmlParseReference()

xmlParseReference ()

xmlParsePEReference()

xmlParsePEReference ()

xmlParseDocTypeDecl()

xmlParseDocTypeDecl ()

xmlParseAttribute()

xmlParseAttribute ()

xmlParseStartTag()

xmlParseStartTag ()
void        xmlParseStartTag                (CHAR*       xmlParseStartTag                (xmlParserCtxtPtr ctxt);
an XML parser context
Returns :the element name parsed


xmlParseEndTag()

xmlParseEndTag ()void xmlParseEndTag (xmlParserCtxtPtr ctxt); ctxt, + CHAR *tagname);
an XML parser context
tagname : the tag name as parsed in the opening tag.


xmlParseCDSect()

xmlParseCDSect ()

xmlParseContent()

xmlParseContent ()

xmlParseElement()

xmlParseElement ()

xmlParseVersionNum()

xmlParseVersionNum ()

xmlParseVersionInfo()

xmlParseVersionInfo ()

xmlParseEncName()

xmlParseEncName ()

xmlParseEncodingDecl()

xmlParseEncodingDecl ()

xmlParseSDDecl()

xmlParseSDDecl ()

xmlParseXMLDecl()

xmlParseXMLDecl ()

xmlParseMisc()

xmlParseMisc ()





xmlDecodeEntities()

xmlDecodeEntities ()



nodePush ()

Prev
int         nodePush                        (xmlParserCtxtPtr ctxt,
+                                             xmlNodePtr value);

ctxt :value :
Home 
xml-error 
Returns : 


Up

nodePop ()

xmlNodePtr  nodePop                         (xmlParserCtxtPtr ctxt);

ctxt : 
Returns : 


inputPush ()

int         inputPush                       (xmlParserCtxtPtr ctxt,
+                                             xmlParserInputPtr value);

ctxt : 
value : 
Returns : 


inputPop ()

xmlParserInputPtr inputPop                  (xmlParserCtxtPtr ctxt);

ctxt : 
Returns : 



<<< Previous PageHomeUp 
xml-error 
Gnome XML Library Reference Manual
Gnome XML Library Reference Manual
Prev<<< Previous PageHomeUpNextNext Page >>>

tree

Name

tree — one line description goes here.
tree —

Synopsis

xmlElementType +>; typedef CHAR +>; typedef xmlNotationPtr +>; enum xmlAttributeType +>; enum xmlAttributeDefault +>; typedef xmlEnumerationPtr +>; typedef xmlAttributePtr +>; enum xmlElementContentType +>; enum xmlElementContentOccur +>; typedef xmlElementContentPtr +>; enum xmlElementTypeVal +>; typedef xmlElementPtr +>; enum xmlNsType +>; typedef xmlNsPtr +>; typedef xmlDtdPtr +>; typedef xmlAttrPtr +>; typedef xmlNode +>; typedef xmlNodePtr +>; typedef xmlDoc +>; typedef xmlDocPtr +>; typedef xmlBuffer +>; typedef xmlBufferPtr +>; +extern xmlNsPtr baseDTD; +extern int oldXMLWDcompatibility; +extern int xmlIndentTreeOutput; xmlBufferPtr

Description

Details


xmlElementType

enum xmlElementType


xmlAttributeType

enum xmlAttributeType

xmlAttributeDefault

enum xmlAttributeDefault



xmlElementContentType

enum xmlElementContentType

xmlElementContentOccur

enum xmlElementContentOccur


xmlElementTypeVal

enum xmlElementTypeVal


xmlNsType

enum xmlNsType










baseDTD

extern xmlNsPtr baseDTD;


oldXMLWDcompatibility

extern int oldXMLWDcompatibility;/* maintain compatibility with old WD */


xmlIndentTreeOutput

extern int xmlIndentTreeOutput;  /* try to indent the tree dumps */


xmlBufferCreate()

xmlBufferCreate ()

xmlBufferFree()

xmlBufferFree ()

xmlBufferDump()

xmlBufferDump ()

xmlBufferAdd()

xmlBufferAdd ()

xmlBufferCat()

xmlBufferCat ()

xmlBufferCCat()

xmlBufferCCat ()

xmlCreateIntSubset()

xmlCreateIntSubset ()

xmlNewDtd()

xmlNewDtd ()

xmlFreeDtd()

xmlFreeDtd ()

xmlNewGlobalNs()

xmlNewGlobalNs ()

xmlNewNs()

xmlNewNs ()

xmlFreeNs()

xmlFreeNs ()

xmlNewDoc()

xmlNewDoc ()

xmlFreeDoc()

xmlFreeDoc ()

xmlNewDocProp()

xmlNewDocProp ()

xmlNewProp()

xmlNewProp ()

xmlFreePropList()

xmlFreePropList ()

xmlFreeProp()

xmlFreeProp ()

xmlCopyProp()

xmlCopyProp ()

xmlCopyPropList()

xmlCopyPropList ()

xmlCopyDtd()

xmlCopyDtd ()

xmlCopyDoc()

xmlCopyDoc ()

xmlNewDocNode()

xmlNewDocNode ()

xmlNewNode()

xmlNewNode ()

xmlNewChild()

xmlNewChild ()

xmlNewDocText()

xmlNewDocText ()

xmlNewText()

xmlNewText ()

xmlNewDocTextLen()

xmlNewDocTextLen ()

xmlNewTextLen()

xmlNewTextLen ()

xmlNewDocComment()

xmlNewDocComment ()

xmlNewComment()

xmlNewComment ()

xmlNewReference()

xmlNewReference ()

xmlCopyNode()

xmlCopyNode ()

xmlCopyNodeList()

xmlCopyNodeList ()

xmlGetLastChild()

xmlGetLastChild ()

xmlNodeIsText()

xmlNodeIsText ()

xmlAddChild()

xmlAddChild ()

xmlUnlinkNode()

xmlUnlinkNode ()

xmlTextMerge()

xmlTextMerge ()

xmlTextConcat()

xmlTextConcat ()

xmlFreeNodeList()

xmlFreeNodeList ()

xmlFreeNode()

xmlFreeNode ()

xmlSearchNs()

xmlSearchNs ()

xmlSearchNsByHref()

xmlSearchNsByHref ()

xmlSetNs()

xmlSetNs ()

xmlCopyNamespace()

xmlCopyNamespace ()

xmlCopyNamespaceList()

xmlCopyNamespaceList ()

xmlSetProp()

xmlSetProp ()

xmlGetProp()

xmlGetProp ()

xmlStringGetNodeList()

xmlStringGetNodeList ()

xmlStringLenGetNodeList()

xmlStringLenGetNodeList ()

xmlNodeListGetString()

xmlNodeListGetString ()

xmlNodeSetContent()

xmlNodeSetContent ()

xmlNodeSetContentLen()

xmlNodeSetContentLen ()

xmlNodeAddContent()

xmlNodeAddContent ()

xmlNodeAddContentLen()

xmlNodeAddContentLen ()

xmlNodeGetContent()

xmlNodeGetContent ()

xmlBufferWriteCHAR()

xmlBufferWriteCHAR ()

xmlBufferWriteChar()

xmlBufferWriteChar ()

xmlBufferWriteQuotedString()

xmlBufferWriteQuotedString ()

xmlDocDumpMemory()

xmlDocDumpMemory ()

xmlDocDump()

xmlDocDump ()

xmlSaveFile()

xmlSaveFile ()

xmlGetDocCompressMode()

xmlGetDocCompressMode ()

xmlSetDocCompressMode()

xmlSetDocCompressMode ()

xmlGetCompressMode()

xmlGetCompressMode ()

xmlSetCompressMode()

xmlSetCompressMode ()


entities
Prev<<< Previous PageHomeHomeUpNextNext Page >>>
parserUpparserentities
Gnome XML Library Reference Manual
Gnome XML Library Reference Manual
Prev<<< Previous PageHomeUpNextNext Page >>>

valid

Name

valid — one line description goes here.
valid —

Synopsis

xmlNotationTablePtr +>; #define XML_MIN_ELEMENT_TABLExmlElementTablePtr +>; #define XML_MIN_ATTRIBUTE_TABLExmlAttributeTablePtr +>; xmlNotationPtr

Description







xmlAddNotationDecl()

xmlAddNotationDecl ()

xmlCopyNotationTable()

xmlCopyNotationTable ()

xmlFreeNotationTable()

xmlFreeNotationTable ()

xmlDumpNotationTable()

xmlDumpNotationTable ()

xmlNewElementContent()

xmlNewElementContent ()

xmlCopyElementContent()

xmlCopyElementContent ()

xmlFreeElementContent()

xmlFreeElementContent ()

xmlAddElementDecl()

xmlAddElementDecl ()

xmlCopyElementTable()

xmlCopyElementTable ()

xmlFreeElementTable()

xmlFreeElementTable ()

xmlDumpElementTable()

xmlDumpElementTable ()

xmlCreateEnumeration()

xmlCreateEnumeration ()

xmlFreeEnumeration()

xmlFreeEnumeration ()

xmlCopyEnumeration()

xmlCopyEnumeration ()

xmlAddAttributeDecl()

xmlAddAttributeDecl ()

xmlCopyAttributeTable()

xmlCopyAttributeTable ()

xmlFreeAttributeTable()

xmlFreeAttributeTable ()

xmlDumpAttributeTable()

xmlDumpAttributeTable ()


xml-error
Prev<<< Previous PageHomeHomeUpNextNext Page >>>
entitiesUpentitiesxml-error
+ + @@ -88,6 +90,9 @@ + + + @@ -279,3 +284,7 @@ + + + + diff --git a/doc/html/libxml.html b/doc/html/libxml.html index fade2b17..8abd7922 100644 --- a/doc/html/libxml.html +++ b/doc/html/libxml.html @@ -22,87 +22,89 @@ CLASS="NAVHEADER" >Gnome XML Library Reference Manual
Gnome XML Library Reference Manual
Prev<<< Previous PageHome NextNext Page >>>

Chapter 1. Gnome XML LibraryGnome XML Library

Table of Contents
parser — one line description goes here.
tree — one line description goes here.
entities — one line description goes here.
valid — one line description goes here.
xml-error — one line description goes here.
parserInternals — one line description goes here.
The parser general interfaces Manipulation the tree generated by the parser Routines for handling entities @@ -112,55 +114,94 @@ HREF="gnome-xml-parserinternals.html"
DOM interfaces.

XML is a standard for markup based structured documents, here is an example:

<?xml version="1.0"?>
-<EXAMPLE prop1="gnome is great" prop2="&linux; too">
+<EXAMPLE prop1="gnome is great" prop2="&amp; linux too">
   <head>
    <title>Welcome to Gnome</title>
   </head>
@@ -36,7 +36,7 @@ The first line specify that it's an XML document and gives useful informations
 about it's encoding. Then the document is a text format whose structure is
 specified by tags between brackets. Each tag opened have to be
 closed XML is pedantic about this, not that for example the image
-tage has no content (just an attribute) and is closed by ending up the tag
+tag has no content (just an attribute) and is closed by ending up the tag
 with />.

The tree output

@@ -285,7 +285,213 @@ presents on other programs like this:

This should help greatly doing things like modifying a gnumeric spreadsheet embedded in a GWP document for example.

+ +

A real example

+Here is a real size example, where the actual content of the application data +is not kept in the DOM tree but uses internal structures. It is based on +a proposal to keep a database of jobs related to Gnome, with an XML based +storage structure. Here is an XML encoded jobs base: +

+<?xml version="1.0"?>
+<gjob:Helping xmlns:gjob="http://www.gnome.org/some-location">
+  <gjob:Jobs>
+
+    <gjob:Job>
+      <gjob:Project ID="3"/>
+      <gjob:Application>GBackup</gjob:Application>
+      <gjob:Category>Development</gjob:Category>
+
+      <gjob:Update>
+	<gjob:Status>Open</gjob:Status>
+	<gjob:Modified>Mon, 07 Jun 1999 20:27:45 -0400 MET DST</gjob:Modified>
+        <gjob:Salary>USD 0.00</gjob:Salary>
+      </gjob:Update>
+
+      <gjob:Developers>
+        <gjob:Developer>
+        </gjob:Developer>
+      </gjob:Developers>
+
+      <gjob:Contact>
+        <gjob:Person>Nathan Clemons</gjob:Person>
+	<gjob:Email>nathan@windsofstorm.net</gjob:Email>
+        <gjob:Company>
+	</gjob:Company>
+        <gjob:Organisation>
+	</gjob:Organisation>
+        <gjob:Webpage>
+	</gjob:Webpage>
+	<gjob:Snailmail>
+	</gjob:Snailmail>
+	<gjob:Phone>
+	</gjob:Phone>
+      </gjob:Contact>
+
+      <gjob:Requirements>
+      The program should be released as free software, under the GPL.
+      </gjob:Requirements>
+
+      <gjob:Skills>
+      </gjob:Skills>
+
+      <gjob:Details>
+      A GNOME based system that will allow a superuser to configure 
+      compressed and uncompressed files and/or file systems to be backed 
+      up with a supported media in the system.  This should be able to 
+      perform via find commands generating a list of files that are passed 
+      to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine 
+      or via operations performed on the filesystem itself. Email 
+      notification and GUI status display very important.
+      </gjob:Details>
+
+    </gjob:Job>
+
+  </gjob:Jobs>
+</gjob:Helping>
+
+
+

+While loading the XML file into an internal DOM tree is a matter of calling +only a couple of functions, browsing the tree to gather the informations +and generate the internals structures is harder, and more error prone.

+

+The suggested principle is to be tolerant with respect to the input +structure. For example the ordering of the attributes is not significant, +Cthe XML specification is clear about it. It's also usually a good idea +to not be dependant of the orders of the childs of a given node, unless it +really makes things harder. Here is some code to parse the informations +for a person: +

+
+/*
+ * A person record
+ */
+typedef struct person {
+    char *name;
+    char *email;
+    char *company;
+    char *organisation;
+    char *smail;
+    char *webPage;
+    char *phone;
+} person, *personPtr;
+
+/*
+ * And the code needed to parse it
+ */
+personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
+    personPtr ret = NULL;
+
+DEBUG("parsePerson\n");
+    /*
+     * allocate the struct
+     */
+    ret = (personPtr) malloc(sizeof(person));
+    if (ret == NULL) {
+        fprintf(stderr,"out of memory\n");
+	return(NULL);
+    }
+    memset(ret, 0, sizeof(person));
+
+    /* We don't care what the top level element name is */
+    cur = cur->childs;
+    while (cur != NULL) {
+        if ((!strcmp(cur->name, "Person")) && (cur->ns == ns))
+	    ret->name = xmlNodeListGetString(doc, cur->childs, 1);
+        if ((!strcmp(cur->name, "Email")) && (cur->ns == ns))
+	    ret->email = xmlNodeListGetString(doc, cur->childs, 1);
+	cur = cur->next;
+    }
+
+    return(ret);
+}
+
+

+Here is a couple of things to notice:

+
    +
  • Usually a recursive parsing style is the more convenient one, +XML data being by nature subject to repetitive constructs and usualy exibit +highly stuctured patterns. +
  • The two arguments of type xmlDocPtr and xmlNsPtr, i.e. +the pointer to the global XML document and the namespace reserved to the +application. Document wide information are needed for example to decode +entities and it's a good coding practice to define a namespace for your +application set of data and test that the element and attributes you're +analyzing actually pertains to your application space. This is done by a simple +equality test (cur->ns == ns). +
  • To retrieve text and attributes value, it is suggested to use +the function xmlNodeListGetString to gather all the text and +entity reference nodes generated by the DOM output and produce an +single text string. +
+

+Here is another piece of code used to parse another level of the structure: +

+
+/*
+ * a Description for a Job
+ */
+typedef struct job {
+    char *projectID;
+    char *application;
+    char *category;
+    personPtr contact;
+    int nbDevelopers;
+    personPtr developers[100]; /* using dynamic alloc is left as an exercise */
+} job, *jobPtr;
+
+/*
+ * And the code needed to parse it
+ */
+jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
+    jobPtr ret = NULL;
+
+DEBUG("parseJob\n");
+    /*
+     * allocate the struct
+     */
+    ret = (jobPtr) malloc(sizeof(job));
+    if (ret == NULL) {
+        fprintf(stderr,"out of memory\n");
+	return(NULL);
+    }
+    memset(ret, 0, sizeof(job));
+
+    /* We don't care what the top level element name is */
+    cur = cur->childs;
+    while (cur != NULL) {
+        
+        if ((!strcmp(cur->name, "Project")) && (cur->ns == ns)) {
+	    ret->projectID = xmlGetProp(cur, "ID");
+	    if (ret->projectID == NULL) {
+		fprintf(stderr, "Project has no ID\n");
+	    }
+	}
+        if ((!strcmp(cur->name, "Application")) && (cur->ns == ns))
+	    ret->application = xmlNodeListGetString(doc, cur->childs, 1);
+        if ((!strcmp(cur->name, "Category")) && (cur->ns == ns))
+	    ret->category = xmlNodeListGetString(doc, cur->childs, 1);
+        if ((!strcmp(cur->name, "Contact")) && (cur->ns == ns))
+	    ret->contact = parsePerson(doc, ns, cur);
+	cur = cur->next;
+    }
+
+    return(ret);
+}
+
+

+One can notice that once used to it, writing this kind of code +is quite simple, but boring. Ultimately, it could be possble to write +stubbers taking either C data structure definitions, a set of XML examples +or an XML DTD and produce the code needed to import and export the +content between C data and XML storage. This is left as an exercise to +the reader :-)

+

+Feel free to use the code for the full C parsing +example as a template, + +Daniel Veillard diff --git a/encoding.c b/encoding.c index a9a5fa76..2184f61a 100644 --- a/encoding.c +++ b/encoding.c @@ -12,7 +12,7 @@ * [US-ASCII] Coded Character Set--7-bit American Standard Code for * Information Interchange, ANSI X3.4-1986. * - * Original code from "Martin J. Duerst" + * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" * * See Copyright for the status of this software. * @@ -20,6 +20,8 @@ */ #include +#include +#include #include "encoding.h" /* @@ -311,3 +313,168 @@ xmlParseCharEncoding(const char* name) if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); return(XML_CHAR_ENCODING_ERROR); } + +/**************************************************************** + * * + * Char encoding handlers * + * * + ****************************************************************/ + +/* the size should be growable, but it's not a big deal ... */ +#define MAX_ENCODING_HANDLERS 50 +static xmlCharEncodingHandlerPtr *handlers = NULL; +static int nbCharEncodingHandler = 0; + +/* + * The default is UTF-8 for XML, that's also the default used for the + * parser internals, so the default encoding handler is NULL + */ + +static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; + +/** + * xmlNewCharEncodingHandler: + * @name: the encoding name, in UTF-8 format (ASCCI actually) + * @input: the xmlCharEncodingInputFunc to read that encoding + * @output: the xmlCharEncodingOutputFunc to write that encoding + * + * Create and registers an xmlCharEncodingHandler. + * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). + */ +xmlCharEncodingHandlerPtr +xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input, + xmlCharEncodingOutputFunc output) { + xmlCharEncodingHandlerPtr handler; + char upper[500]; + int i; + char *up = 0; + + /* + * Keep only the uppercase version of the encoding. + */ + if (name == NULL) { + fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n"); + return(NULL); + } + for (i = 0;i < 499;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + up = strdup(upper); + if (up == NULL) { + fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n"); + return(NULL); + } + + /* + * allocate and fill-up an handler block. + */ + handler = (xmlCharEncodingHandlerPtr) + malloc(sizeof(xmlCharEncodingHandler)); + if (handler == NULL) { + fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n"); + return(NULL); + } + handler->input = input; + handler->output = output; + handler->name = up; + + /* + * registers and returns the handler. + */ + xmlRegisterCharEncodingHandler(handler); + return(handler); +} + +/** + * xmlInitCharEncodingHandlers: + * + * Initialize the char encoding support, it registers the default + * encoding supported. + * NOTE: while public theis function usually don't need to be called + * in normal processing. + */ +void +xmlInitCharEncodingHandlers(void) { + if (handlers != NULL) return; + + handlers = (xmlCharEncodingHandlerPtr *) + malloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); + + if (handlers == NULL) { + fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n"); + return; + } + xmlNewCharEncodingHandler("UTF-8", NULL, NULL); + xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); + xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); +} + +/** + * xmlRegisterCharEncodingHandler: + * @handler: the xmlCharEncodingHandlerPtr handler block + * + * Register the char encoding handler, surprizing, isn't it ? + */ +void +xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { + if (handlers == NULL) xmlInitCharEncodingHandlers(); + if (handler == NULL) { + fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n"); + return; + } + + if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { + fprintf(stderr, + "xmlRegisterCharEncodingHandler: Too many handler registered\n"); + fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__); + return; + } + handlers[nbCharEncodingHandler++] = handler; +} + +/** + * xmlGetCharEncodingHandler: + * @enc: an xmlCharEncoding value. + * + * Search in the registrered set the handler able to read/write that encoding. + * + * Returns the handler or NULL if not found + */ +xmlCharEncodingHandlerPtr +xmlGetCharEncodingHandler(xmlCharEncoding enc) { + if (handlers == NULL) xmlInitCharEncodingHandlers(); + return(NULL); +} + +/** + * xmlGetCharEncodingHandler: + * @enc: a string describing the char encoding. + * + * Search in the registrered set the handler able to read/write that encoding. + * + * Returns the handler or NULL if not found + */ +xmlCharEncodingHandlerPtr +xmlFindCharEncodingHandler(const char *name) { + char upper[500]; + int i; + + if (handlers == NULL) xmlInitCharEncodingHandlers(); + if (name == NULL) return(xmlDefaultCharEncodingHandler); + if (name[0] == 0) return(xmlDefaultCharEncodingHandler); + + for (i = 0;i < 499;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + for (i = 0;i < nbCharEncodingHandler; i++) + if (!strcmp(name, handlers[i]->name)) + return(handlers[i]); + + return(NULL); +} + diff --git a/encoding.h b/encoding.h index 3c5cee04..227abaf0 100644 --- a/encoding.h +++ b/encoding.h @@ -25,6 +25,9 @@ extern "C" { #endif +/** + * Predefined values for some standard encodings + */ typedef enum { XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ @@ -51,8 +54,57 @@ typedef enum { XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ } xmlCharEncoding; -extern xmlCharEncoding xmlDetectCharEncoding(const unsigned char* in); -extern xmlCharEncoding xmlParseCharEncoding(const char* name); +/** + * xmlCharEncodingInputFunc: + * @out: a pointer ot an array of bytes to store the UTF-8 result + * @outlen: the lenght of @out + * @in: a pointer ot an array of chars in the original encoding + * @inlen: the lenght of @in + * + * Take a block of chars in the original encoding and try to convert + * it to an UTF-8 block of chars out. + * + * Returns the number of byte written, or -1 by lack of space. + */ +typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen, + unsigned char* in, int inlen); + + +/** + * xmlCharEncodingInputFunc: + * @out: a pointer ot an array of bytes to store the result + * @outlen: the lenght of @out + * @in: a pointer ot an array of UTF-8 chars + * @inlen: the lenght of @in + * + * Take a block of UTF-8 chars in and try to convert it to an other + * encoding. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen, + unsigned char* in, int inlen); + +/* + * Block defining the handlers for non UTF-8 encodings. + */ + +typedef struct xmlCharEncodingHandler { + char *name; + xmlCharEncodingInputFunc input; + xmlCharEncodingOutputFunc output; +} xmlCharEncodingHandler; +typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; + +void xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler); +xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc); +xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name); + +xmlCharEncoding xmlDetectCharEncoding(const unsigned char* in); +xmlCharEncoding xmlParseCharEncoding(const char* name); + +void xmlInitCharEncodingHandlers(void); #ifdef __cplusplus } diff --git a/entities.c b/entities.c index c61f81db..15d9c71b 100644 --- a/entities.c +++ b/entities.c @@ -29,19 +29,6 @@ struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = { xmlEntitiesTablePtr xmlPredefinedEntities = NULL; -/* - * Macro used to grow the current buffer. - */ -#define growBuffer() { \ - buffer_size *= 2; \ - buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR)); \ - if (buffer == NULL) { \ - perror("realloc failed"); \ - exit(1); \ - } \ -} - - /* * xmlFreeEntity : clean-up an entity record. */ @@ -301,6 +288,25 @@ xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) { (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) +/* + * A buffer used for converting entities to their equivalent and back. + * + * TODO: remove this, this helps performances but forbid reentrancy in a + * stupid way. + */ +static int buffer_size = 0; +static CHAR *buffer = NULL; + +void growBuffer(void) { + buffer_size *= 2; + buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR)); + if (buffer == NULL) { + perror("realloc failed"); + exit(1); + } +} + + /** * xmlEncodeEntities: * @doc: the document containing the string @@ -312,10 +318,142 @@ xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) { * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii * get erroneous. * + * TODO This routine is not reentrant, the interface + * should not be modified though. + * + * People must migrate their code to xmlEncodeEntitiesReentrant ! + * + * Returns A newly allocated string with the substitution done. + */ +const CHAR * +xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { + const CHAR *cur = input; + CHAR *out = buffer; + + if (input == NULL) return(NULL); + if (buffer == NULL) { + buffer_size = 1000; + buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR)); + if (buffer == NULL) { + perror("malloc failed"); + exit(1); + } + out = buffer; + } + while (*cur != '\0') { + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(); + out = &buffer[index]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '\'') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'p'; + *out++ = 'o'; + *out++ = 's'; + *out++ = ';'; + } else if (((*cur >= 0x20) && (*cur < 0x80)) || + (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { + /* + * default case, just copy ! + */ + *out++ = *cur; +#ifndef USE_UTF_8 + } else if ((sizeof(CHAR) == 1) && (*cur >= 0x80)) { + char buf[10], *ptr; +#ifdef HAVE_SNPRINTF + snprintf(buf, 9, "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; +#endif + } else if (IS_CHAR(*cur)) { + char buf[10], *ptr; + +#ifdef HAVE_SNPRINTF + snprintf(buf, 9, "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + } +#if 0 + else { + /* + * default case, this is not a valid char ! + * Skip it... + */ + fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur); + } +#endif + cur++; + } + *out++ = 0; + return(buffer); +} + +/* + * Macro used to grow the current buffer. + */ +#define growBufferReentrant() { \ + buffer_size *= 2; \ + buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR)); \ + if (buffer == NULL) { \ + perror("realloc failed"); \ + exit(1); \ + } \ +} + + +/** + * xmlEncodeEntitiesReentrant: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * Contrary to xmlEncodeEntities, this routine is reentrant, and result + * must be deallocated. + * + * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii + * get erroneous. + * * Returns A newly allocated string with the substitution done. */ CHAR * -xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { +xmlEncodeEntitiesReentrant(xmlDocPtr doc, const CHAR *input) { const CHAR *cur = input; CHAR *buffer = NULL; CHAR *out = NULL; @@ -338,7 +476,7 @@ xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { if (out - buffer > buffer_size - 100) { int index = out - buffer; - growBuffer(); + growBufferReentrant(); out = &buffer[index]; } diff --git a/entities.h b/entities.h index b6535421..86040574 100644 --- a/entities.h +++ b/entities.h @@ -66,7 +66,8 @@ void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type, xmlEntityPtr xmlGetPredefinedEntity(const CHAR *name); xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name); xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name); -CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input); +const CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input); +CHAR *xmlEncodeEntitiesReentrant(xmlDocPtr doc, const CHAR *input); xmlEntitiesTablePtr xmlCreateEntitiesTable(void); xmlEntitiesTablePtr xmlCopyEntitiesTable(xmlEntitiesTablePtr table); void xmlFreeEntitiesTable(xmlEntitiesTablePtr table); diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h index 3c5cee04..227abaf0 100644 --- a/include/libxml/encoding.h +++ b/include/libxml/encoding.h @@ -25,6 +25,9 @@ extern "C" { #endif +/** + * Predefined values for some standard encodings + */ typedef enum { XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ @@ -51,8 +54,57 @@ typedef enum { XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ } xmlCharEncoding; -extern xmlCharEncoding xmlDetectCharEncoding(const unsigned char* in); -extern xmlCharEncoding xmlParseCharEncoding(const char* name); +/** + * xmlCharEncodingInputFunc: + * @out: a pointer ot an array of bytes to store the UTF-8 result + * @outlen: the lenght of @out + * @in: a pointer ot an array of chars in the original encoding + * @inlen: the lenght of @in + * + * Take a block of chars in the original encoding and try to convert + * it to an UTF-8 block of chars out. + * + * Returns the number of byte written, or -1 by lack of space. + */ +typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen, + unsigned char* in, int inlen); + + +/** + * xmlCharEncodingInputFunc: + * @out: a pointer ot an array of bytes to store the result + * @outlen: the lenght of @out + * @in: a pointer ot an array of UTF-8 chars + * @inlen: the lenght of @in + * + * Take a block of UTF-8 chars in and try to convert it to an other + * encoding. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen, + unsigned char* in, int inlen); + +/* + * Block defining the handlers for non UTF-8 encodings. + */ + +typedef struct xmlCharEncodingHandler { + char *name; + xmlCharEncodingInputFunc input; + xmlCharEncodingOutputFunc output; +} xmlCharEncodingHandler; +typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; + +void xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler); +xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc); +xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name); + +xmlCharEncoding xmlDetectCharEncoding(const unsigned char* in); +xmlCharEncoding xmlParseCharEncoding(const char* name); + +void xmlInitCharEncodingHandlers(void); #ifdef __cplusplus } diff --git a/include/libxml/entities.h b/include/libxml/entities.h index b6535421..86040574 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -66,7 +66,8 @@ void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type, xmlEntityPtr xmlGetPredefinedEntity(const CHAR *name); xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name); xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name); -CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input); +const CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input); +CHAR *xmlEncodeEntitiesReentrant(xmlDocPtr doc, const CHAR *input); xmlEntitiesTablePtr xmlCreateEntitiesTable(void); xmlEntitiesTablePtr xmlCopyEntitiesTable(xmlEntitiesTablePtr table); void xmlFreeEntitiesTable(xmlEntitiesTablePtr table); diff --git a/include/libxml/parser.h b/include/libxml/parser.h index aaab58c4..0b8d22fe 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -10,6 +10,7 @@ #define __XML_PARSER_H__ #include "tree.h" +#include "xmlIO.h" #ifdef __cplusplus extern "C" { @@ -22,6 +23,9 @@ extern "C" { typedef void (* xmlParserInputDeallocate)(CHAR *); typedef struct xmlParserInput { + /* Input buffer */ + xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ + const char *filename; /* The file analyzed, if any */ const CHAR *base; /* Base of the array to parse */ const CHAR *cur; /* Current char being parsed */ @@ -169,8 +173,10 @@ typedef xmlSAXHandler *xmlSAXHandlerPtr; /* * Global variables: just the SAX interface tables we are looking for full - * reentrancy of the code ! + * reentrancy of the code and version infos. */ +extern const char *xmlParserVersion; + extern xmlSAXLocator xmlDefaultSAXLocator; extern xmlSAXHandler xmlDefaultSAXHandler; diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 6a31c512..b1077188 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -614,10 +614,10 @@ void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt); CHAR * xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value); -void +CHAR * xmlParseStartTag(xmlParserCtxtPtr ctxt); void -xmlParseEndTag(xmlParserCtxtPtr ctxt); +xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname); void xmlParseCDSect(xmlParserCtxtPtr ctxt); void diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h new file mode 100644 index 00000000..d6edcae2 --- /dev/null +++ b/include/libxml/xmlIO.h @@ -0,0 +1,40 @@ +/* + * xmlIO.h : interface for the I/O interfaces used by the parser + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_IO_H__ +#define __XML_IO_H__ + +#include +#include "tree.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct xmlParserInputBuffer { + /* Inputs */ + FILE *file; /* Input on file handler */ + int fd; /* Input on a file descriptor */ +/********** +#ifdef HAVE_ZLIB_H + gzFile gzfile; Input on a compressed stream +#endif + */ + + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */ + +} xmlParserInputBuffer; + +typedef xmlParserInputBuffer *xmlParserInputBufferPtr; + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_IO_H__ */ diff --git a/parser.c b/parser.c index 7a627edf..e551b050 100644 --- a/parser.c +++ b/parser.c @@ -34,6 +34,8 @@ #include "valid.h" #include "parserInternals.h" +const char *xmlParserVersion = LIBXML_VERSION; + /************************************************************************ * * * Parser stacks related functions and macros * @@ -2845,6 +2847,9 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { ctxt->sax->elementDecl(ctxt->userData, name, ret, content); } + if (content != NULL) { + xmlFreeElementContent(content); + } if (name != NULL) { free(name); } @@ -3162,7 +3167,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { */ xmlEntityPtr xmlParseEntityRef(xmlParserCtxtPtr ctxt) { - const CHAR *q; + const CHAR *q; /* !!!!!!!!!!! Unused !!!!!!!!!! */ CHAR *name; xmlEntityPtr ent = NULL; @@ -3482,9 +3487,11 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) { * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' * * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' + * + * Returns the element name parsed */ -void +CHAR * xmlParseStartTag(xmlParserCtxtPtr ctxt) { CHAR *name; CHAR *attname; @@ -3494,7 +3501,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { int maxatts = 0; int i; - if (CUR != '<') return; + if (CUR != '<') return(NULL); NEXT; name = xmlParseName(ctxt); @@ -3503,7 +3510,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { ctxt->sax->error(ctxt->userData, "xmlParseStartTag: invalid element name\n"); ctxt->wellFormed = 0; - return; + return(NULL); } /* @@ -3543,7 +3550,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { if (atts == NULL) { fprintf(stderr, "malloc of %d byte failed\n", maxatts * sizeof(CHAR *)); - return; + return(NULL); } } else if (nbatts + 2 < maxatts) { maxatts *= 2; @@ -3551,7 +3558,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { if (atts == NULL) { fprintf(stderr, "realloc of %d byte failed\n", maxatts * sizeof(CHAR *)); - return; + return(NULL); } } atts[nbatts++] = attname; @@ -3576,16 +3583,17 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) ctxt->sax->startElement(ctxt->userData, name, atts); - free(name); if (atts != NULL) { for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]); free(atts); } + return(name); } /** * xmlParseEndTag: * @ctxt: an XML parser context + * @tagname: the tag name as parsed in the opening tag. * * parse an end of tag * @@ -3597,7 +3605,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { */ void -xmlParseEndTag(xmlParserCtxtPtr ctxt) { +xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) { CHAR *name; if ((CUR != '<') || (NXT(1) != '/')) { @@ -3621,6 +3629,16 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { } else NEXT; + /* + * Well formedness constraints, opening and closing must match. + */ + if (xmlStrcmp(name, tagname)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", tagname, name); + ctxt->wellFormed = 0; + } + /* * SAX: End of Tag */ @@ -3792,13 +3810,17 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { void xmlParseElement(xmlParserCtxtPtr ctxt) { const CHAR *openTag = CUR_PTR; + CHAR *name; xmlParserNodeInfo node_info; /* Capture start position */ node_info.begin_pos = CUR_PTR - ctxt->input->base; node_info.begin_line = ctxt->input->line; - xmlParseStartTag(ctxt); + name = xmlParseStartTag(ctxt); + if (name == NULL) { + return; + } /* * Check for an Empty Element. @@ -3806,7 +3828,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { if ((CUR == '/') && (NXT(1) == '>')) { SKIP(2); if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, NULL); + ctxt->sax->endElement(ctxt->userData, name); + free(name); return; } if (CUR == '>') NEXT; @@ -3818,10 +3841,9 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { /* * end of parsing of this node. - * TODO !!!!!!!! check the macro in case of non DOM parsing */ nodePop(ctxt); - + free(name); return; } @@ -3837,17 +3859,17 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { /* * end of parsing of this node. - * TODO !!!!!!!! check the macro in case of non DOM parsing */ nodePop(ctxt); - + free(name); return; } /* * parse the end of tag: ' 14/12 gauge double butted spoke - + black anodised low torsion hub - + twin wall, eyeletted rim - + 8 speed, wide ratio gearing - + double cross lacing of 32 spokes - + Front wheel The front wheel provides grip, steering and some shock absorption - + + diff --git a/result/SVG/defs.xml b/result/SVG/defs.xml index ba485729..0ceee0bd 100644 --- a/result/SVG/defs.xml +++ b/result/SVG/defs.xml @@ -9,4 +9,5 @@ Defining things for later use - + diff --git a/result/SVG/desc.xml b/result/SVG/desc.xml index ed598b9f..7f65d2ae 100644 --- a/result/SVG/desc.xml +++ b/result/SVG/desc.xml @@ -9,5 +9,5 @@ This is a bar chart which shows company sales by region. - + diff --git a/result/SVG/gradient.xml b/result/SVG/gradient.xml index 1905b603..63f97f7c 100644 --- a/result/SVG/gradient.xml +++ b/result/SVG/gradient.xml @@ -6,7 +6,7 @@ - + diff --git a/result/SVG/marker.xml b/result/SVG/marker.xml index 013c5cd0..f99b977f 100644 --- a/result/SVG/marker.xml +++ b/result/SVG/marker.xml @@ -9,11 +9,12 @@ An double-headed arrow example using markers - + - + - + diff --git a/result/SVG/mathswitch.xml b/result/SVG/mathswitch.xml index 8f4e539b..f7a95ebc 100644 --- a/result/SVG/mathswitch.xml +++ b/result/SVG/mathswitch.xml @@ -4,10 +4,17 @@ fallback graphical representation of an equation, if MathML is not supported. - - - - + + + + + Formula goes here diff --git a/result/SVG/parentns.xml b/result/SVG/parentns.xml index b204637e..0b8ecdcd 100644 --- a/result/SVG/parentns.xml +++ b/result/SVG/parentns.xml @@ -1,6 +1,6 @@ - + - - + + diff --git a/result/SVG/patternfill.xml b/result/SVG/patternfill.xml index 425b361b..0a425fd7 100644 --- a/result/SVG/patternfill.xml +++ b/result/SVG/patternfill.xml @@ -2,7 +2,9 @@ - + + diff --git a/result/SVG/private.xml b/result/SVG/private.xml index e69fc46c..e492a25d 100644 --- a/result/SVG/private.xml +++ b/result/SVG/private.xml @@ -7,9 +7,10 @@ - + This chart includes private data in another namespace - + diff --git a/result/SVG/richdesc.xml b/result/SVG/richdesc.xml index e2a15d97..eee8fa33 100644 --- a/result/SVG/richdesc.xml +++ b/result/SVG/richdesc.xml @@ -7,5 +7,6 @@ mydoc namespace. - + + diff --git a/result/SVG/structure01.xml b/result/SVG/structure01.xml index a60174a7..bdac7423 100644 --- a/result/SVG/structure01.xml +++ b/result/SVG/structure01.xml @@ -1,6 +1,6 @@ - + - + diff --git a/result/SVG/switch.xml b/result/SVG/switch.xml index 383a47f5..124e6139 100644 --- a/result/SVG/switch.xml +++ b/result/SVG/switch.xml @@ -1,9 +1,13 @@ - - - + + + diff --git a/result/SVG/symbol-use.xml b/result/SVG/symbol-use.xml index 9265a666..f6e97a7b 100644 --- a/result/SVG/symbol-use.xml +++ b/result/SVG/symbol-use.xml @@ -2,13 +2,15 @@ - + + Examples of inline and referenced content - - - + + + + - + diff --git a/result/SVG/transform.xml b/result/SVG/transform.xml index 65712acc..0f456429 100644 --- a/result/SVG/transform.xml +++ b/result/SVG/transform.xml @@ -3,10 +3,14 @@ Demonstration of coordinate transforms - This prints 12 pixels high. + This prints 12 pixels high. This prints 12 pixels high. - - This prints 24 pixels high. - This prints 12 pixels high. + + This prints 24 pixels high. + This prints 12 pixels high. diff --git a/result/SVG/trivial.xml b/result/SVG/trivial.xml index 2ff9a379..558c7bf2 100644 --- a/result/SVG/trivial.xml +++ b/result/SVG/trivial.xml @@ -1,2 +1,3 @@ - + + diff --git a/result/SVG/viewport-nest.xml b/result/SVG/viewport-nest.xml index c7426351..7ce61a97 100644 --- a/result/SVG/viewport-nest.xml +++ b/result/SVG/viewport-nest.xml @@ -4,5 +4,7 @@ This SVG drawing embeds another one, thus establishing a new viewport - + + diff --git a/result/SVG/viewport-transform.xml b/result/SVG/viewport-transform.xml index 5acbc4c0..f1c14658 100644 --- a/result/SVG/viewport-transform.xml +++ b/result/SVG/viewport-transform.xml @@ -3,15 +3,26 @@ Transformation with establishment of a new viewport - This prints 12 pixels high. + This prints 12 pixels high. This prints 12 pixels high. - - This prints 24 pixels high. - This prints 12 pixels high. + + This prints 24 pixels high. + This prints 12 pixels high. - - - This prints 36 pixels high. + + + This prints 36 pixels high. This prints 36 pixels high. diff --git a/result/SVG/viewport.xml b/result/SVG/viewport.xml index 26bb86c2..2847a2fb 100644 --- a/result/SVG/viewport.xml +++ b/result/SVG/viewport.xml @@ -1,6 +1,6 @@ - + - + diff --git a/testSAX.c b/testSAX.c index 8d646915..1e7acb1b 100644 --- a/testSAX.c +++ b/testSAX.c @@ -177,8 +177,16 @@ internalSubsetDebug(xmlParserCtxtPtr ctxt, const CHAR *name, xmlParserInputPtr resolveEntityDebug(xmlParserCtxtPtr ctxt, const CHAR *publicId, const CHAR *systemId) { - fprintf(stdout, "SAX.resolveEntity(%s, %s)\n", - (char *)publicId, (char *)systemId); + + fprintf(stdout, "SAX.resolveEntity("); + if (publicId != NULL) + fprintf(stdout, "%s", (char *)publicId); + else + fprintf(stdout, " "); + if (systemId != NULL) + fprintf(stdout, ", %s)\n", (char *)systemId); + else + fprintf(stdout, ", )\n"); if (systemId != NULL) { return(xmlNewInputFromFile(ctxt, systemId)); } diff --git a/tester.c b/tester.c index f2a7cf17..5836fc66 100644 --- a/tester.c +++ b/tester.c @@ -197,10 +197,13 @@ int main(int argc, char **argv) { } } if (files == 0) { - printf("\nFirst test for the parser, with errors\n"); - parseAndPrintBuffer(buffer); - printf("\nBuilding a tree from scratch and printing it\n"); - treeTest(); + printf("Usage : %s [--debug] [--copy] [--recover] [--noent] XMLfiles ...\n", + argv[0]); + printf("\tParse the XML files and output the result of the parsing\n"); + printf("\t--debug : dump a debug tree of the in-memory document\n"); + printf("\t--copy : used to test the internal copy implementation\n"); + printf("\t--recover : output what is parsable on broken XmL documents\n"); + printf("\t--noent : substitute entity references by their value\n"); } return(0); diff --git a/tree.c b/tree.c index 617264cd..7442f0a7 100644 --- a/tree.c +++ b/tree.c @@ -673,8 +673,15 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) { if (node->type == XML_TEXT_NODE) { if (inLine) ret = xmlStrcat(ret, node->content); - else - ret = xmlStrcat(ret, xmlEncodeEntities(doc, node->content)); + else { + CHAR *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, node->content); + if (buffer != NULL) { + ret = xmlStrcat(ret, buffer); + free(buffer); + } + } } else if (node->type == XML_ENTITY_REF_NODE) { if (inLine) { ent = xmlGetDocEntity(doc, node->name); @@ -2566,8 +2573,15 @@ xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) { return; } if (cur->type == XML_TEXT_NODE) { - if (cur->content != NULL) - xmlBufferWriteCHAR(buf, xmlEncodeEntities(doc, cur->content)); + if (cur->content != NULL) { + CHAR *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + if (buffer != NULL) { + xmlBufferWriteCHAR(buf, buffer); + free(buffer); + } + } return; } if (cur->type == XML_COMMENT_NODE) { @@ -2605,8 +2619,15 @@ xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) { return; } xmlBufferWriteChar(buf, ">"); - if (cur->content != NULL) - xmlBufferWriteCHAR(buf, xmlEncodeEntities(doc, cur->content)); + if (cur->content != NULL) { + CHAR *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + if (buffer != NULL) { + xmlBufferWriteCHAR(buf, buffer); + free(buffer); + } + } if (cur->childs != NULL) { xmlNodeListDump(buf, doc, cur->childs, level + 1); } diff --git a/valid.c b/valid.c index 21655374..0a62b7ee 100644 --- a/valid.c +++ b/valid.c @@ -78,8 +78,13 @@ xmlCopyElementContent(xmlElementContentPtr cur) { if (cur == NULL) return(NULL); ret = xmlNewElementContent((CHAR *) cur->name, cur->type); - if (cur->c1 != NULL) cur->c1 = xmlCopyElementContent(cur->c1); - if (cur->c2 != NULL) cur->c2 = xmlCopyElementContent(cur->c2); + if (ret == NULL) { + fprintf(stderr, "xmlCopyElementContent : out of memory\n"); + return(NULL); + } + ret->ocur = cur->ocur; + if (cur->c1 != NULL) ret->c1 = xmlCopyElementContent(cur->c1); + if (cur->c2 != NULL) ret->c2 = xmlCopyElementContent(cur->c2); return(ret); } @@ -311,7 +316,7 @@ xmlAddElementDecl(xmlDtdPtr dtd, const CHAR *name, int type, */ ret->type = type; ret->name = xmlStrdup(name); - ret->content = content; + ret->content = xmlCopyElementContent(content); table->nb_elements++; return(ret); diff --git a/xmlIO.h b/xmlIO.h new file mode 100644 index 00000000..d6edcae2 --- /dev/null +++ b/xmlIO.h @@ -0,0 +1,40 @@ +/* + * xmlIO.h : interface for the I/O interfaces used by the parser + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_IO_H__ +#define __XML_IO_H__ + +#include +#include "tree.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct xmlParserInputBuffer { + /* Inputs */ + FILE *file; /* Input on file handler */ + int fd; /* Input on a file descriptor */ +/********** +#ifdef HAVE_ZLIB_H + gzFile gzfile; Input on a compressed stream +#endif + */ + + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */ + +} xmlParserInputBuffer; + +typedef xmlParserInputBuffer *xmlParserInputBufferPtr; + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_IO_H__ */