updated a bit made a comment more specific more work on the Schemas

* TODO: updated a bit
* parser.c: made a comment more specific
* xmlregexp.c xmlschemas.c xmlschemastypes.c: more work on the
  Schemas conformance.
* test/schemas result/schemas: updated the test list
Daniel
This commit is contained in:
Daniel Veillard 2002-04-22 16:01:24 +00:00
parent 441bc32e31
commit e19fc23b64
27 changed files with 292 additions and 34 deletions

View File

@ -1,3 +1,11 @@
Mon Apr 22 17:59:14 CEST 2002 Daniel Veillard <daniel@veillard.com>
* TODO: updated a bit
* parser.c: made a comment more specific
* xmlregexp.c xmlschemas.c xmlschemastypes.c: more work on the
Schemas conformance.
* test/schemas result/schemas: updated the test list
Sat Apr 20 19:36:39 CEST 2002 Daniel Veillard <daniel@veillard.com>
* xmlregexp.c xmlschemas.c include/libxml/xmlautomata.h:

25
TODO
View File

@ -27,22 +27,15 @@ TODO:
- Better checking of external parsed entities TAG 1234
- Go through erratas and do the cleanup.
http://www.w3.org/XML/xml-19980210-errata ... started ...
- reported by Michael, update of doc node when pasting on a new document
There can be far more than simply the doc pointer which refers to
the old document, for example namespace declarations or entities
references can also be a nasty problem, far more than updating the
doc values.
- jamesh suggestion: SAX like functions to save a document ie. call a
function to open a new element with given attributes, write character
data, close last element, etc
+ inversted SAX, initial patch in April 2002 archives.
- htmlParseDoc has parameter encoding which is not used.
Function htmlCreateDocParserCtxt ignore it.
- fix realloc() usage.
- compliance to XML-Namespace checking, see section 6 of
http://www.w3.org/TR/REC-xml-names/
- Fix output of <tst val="x&#xA;y"/>
- Implement OASIS XML Catalog support
http://www.oasis-open.org/committees/entity/
- Stricten the UTF8 conformance (Martin Duerst):
http://www.w3.org/2001/06/utf-8-test/.
The bad files are in http://www.w3.org/2001/06/utf-8-wrong/.
@ -51,9 +44,10 @@ TODO:
TODO:
=====
- Get OASIS testsuite to a more friendly result, check all the results
once stable. Current state at:
http://xmlsoft.org/conf/result.html
- move all string manipulation functions (xmlStrdup, xmlStrlen, etc.) to
global.c. Bjorn noted that the following files depends on parser.o solely
because of these string functions: entities.o, global.o, hash.o, tree.o,
xmlIO.o, and xpath.o.
- Optimization of tag strings allocation ?
@ -67,6 +61,8 @@ TODO:
EXTENSIONS:
===========
- Fix output of <tst val="x&#xA;y"/>
- Tools to produce man pages from the SGML docs.
- Add Xpointer recognition/API
@ -77,6 +73,7 @@ EXTENSIONS:
- Implement XSchemas
=> Really need to be done <grin/>
- started
- O2K parsing;
=> this is a somewhat ugly mix of HTML and XML, adding a specific
@ -106,6 +103,12 @@ EXTENSIONS:
Done:
=====
- Implement OASIS XML Catalog support
http://www.oasis-open.org/committees/entity/
- Get OASIS testsuite to a more friendly result, check all the results
once stable. the check-xml-test-suite.py script does this
- Implement XSLT
=> libxslt

View File

@ -87,6 +87,7 @@ int xmlAutomataNewCounter (xmlAutomataPtr am,
int max);
xmlRegexpPtr xmlAutomataCompile (xmlAutomataPtr am);
int xmlAutomataIsDeterminist(xmlAutomataPtr am);
#ifdef __cplusplus
}

View File

@ -41,6 +41,7 @@ typedef enum {
XML_SCHEMAS_ERR_EXTRACONTENT,
XML_SCHEMAS_ERR_INVALIDATTR,
XML_SCHEMAS_ERR_INVALIDELEM,
XML_SCHEMAS_ERR_NOTDETERMINIST,
XML_SCHEMAS_ERR_CONSTRUCT,
XML_SCHEMAS_ERR_INTERNAL,
XML_SCHEMAS_ERR_NOTSIMPLE,

View File

@ -9943,8 +9943,8 @@ xmlCreateFileParserCtxt(const char *filename)
* It use the given SAX function block to handle the parsing callback.
* If sax is NULL, fallback to the default DOM tree building routines.
*
* User data (void *) is stored within the parser context, so it is
* available nearly everywhere in libxml.
* User data (void *) is stored within the parser context in the
* context's _private member, so it is available nearly everywhere in libxml
*
* Returns the resulting document tree
*/

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_0.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_0.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_0.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_0.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_0.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of all2 : ./test/schemas/all_0.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_0.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_1.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_1.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_1.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_1.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_1.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

View File

@ -2,7 +2,6 @@ Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of all2 : ./test/schemas/all_1.xsd:5 :elements
Type of anontype1 : ./test/schemas/all_1.xsd:4 :elements
Building content model for doc
Unimplemented block at xmlschemas.c:3062
Content model of doc:
regexp: '(null)'
3 atoms:

1
result/schemas/empty_0_0 Normal file
View File

@ -0,0 +1 @@
./test/schemas/empty_0.xml validates

View File

@ -0,0 +1,13 @@
Type of restriction 3 : ./test/schemas/empty_0.xsd:6 :empty
Type of complexContent 2 : ./test/schemas/empty_0.xsd:5 :empty
Type of complexContent 2 : ./test/schemas/empty_0.xsd:5 :empty
Type of anontype1 : ./test/schemas/empty_0.xsd:4 :empty
Type of restriction 3 : ./test/schemas/empty_0.xsd:6 :empty
Building content model for internationalPrice
Content model of internationalPrice:
regexp: '(null)'
0 atoms:
1 states:
state: FINAL 0, 0 transitions:
0 counters:
Element internationalPrice content check succeeded

1
result/schemas/empty_1_0 Normal file
View File

@ -0,0 +1 @@
./test/schemas/empty_0.xml validates

View File

@ -0,0 +1,9 @@
Type of anontype1 : ./test/schemas/empty_1.xsd:4 :empty
Building content model for internationalPrice
Content model of internationalPrice:
regexp: '(null)'
0 atoms:
1 states:
state: FINAL 0, 0 transitions:
0 counters:
Element internationalPrice content check succeeded

1
test/schemas/empty_0.xml Normal file
View File

@ -0,0 +1 @@
<internationalPrice currency="EUR" value="423.46"/>

13
test/schemas/empty_0.xsd Normal file
View File

@ -0,0 +1,13 @@
<?xml version="1.0"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="internationalPrice">
<xsd:complexType>
<xsd:complexContent>
<xsd:restriction base="xsd:anyType">
<xsd:attribute name="currency" type="xsd:string"/>
<xsd:attribute name="value" type="xsd:decimal"/>
</xsd:restriction>
</xsd:complexContent>
</xsd:complexType>
</xsd:element>
</xsd:schema>

9
test/schemas/empty_1.xsd Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="internationalPrice">
<xsd:complexType>
<xsd:attribute name="currency" type="xsd:string"/>
<xsd:attribute name="value" type="xsd:decimal"/>
</xsd:complexType>
</xsd:element>
</xsd:schema>

View File

@ -40,6 +40,16 @@
#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
#define NEXTL(l) ctxt->cur += l;
/**
* TODO:
*
* macro to flag unimplemented blocks
*/
#define TODO \
xmlGenericError(xmlGenericErrorContext, \
"Unimplemented block at %s:%d\n", \
__FILE__, __LINE__);
/************************************************************************
* *
@ -216,6 +226,8 @@ struct _xmlAutomata {
int maxCounters;
int nbCounters;
xmlRegCounter *counters;
int determinist;
};
struct _xmlRegexp {
@ -226,6 +238,7 @@ struct _xmlRegexp {
xmlRegAtomPtr *atoms;
int nbCounters;
xmlRegCounter *counters;
int determinist;
};
typedef struct _xmlRegExecRollback xmlRegExecRollback;
@ -322,6 +335,7 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
ctxt->nbCounters = 0;
ret->counters = ctxt->counters;
ctxt->counters = NULL;
ret->determinist = ctxt->determinist;
return(ret);
}
@ -346,6 +360,7 @@ xmlRegNewParserCtxt(const xmlChar *string) {
ret->cur = ret->string;
ret->neg = 0;
ret->error = 0;
ret->determinist = -1;
return(ret);
}
@ -1284,6 +1299,151 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
}
}
/**
* xmlFACompareAtoms:
* @atom1: an atom
* @atom2: an atom
*
* Compares two atoms to check whether they are equivatents
*
* Returns 1 if yes and 0 otherwise
*/
static int
xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
if (atom1 == atom2)
return(1);
if ((atom1 == NULL) || (atom2 == NULL))
return(0);
if (atom1->type != atom2->type)
return(0);
switch (atom1->type) {
case XML_REGEXP_STRING:
return(xmlStrEqual((xmlChar *)atom1->valuep,
(xmlChar *)atom2->valuep));
case XML_REGEXP_EPSILON:
return(1);
case XML_REGEXP_CHARVAL:
return(atom1->codepoint == atom2->codepoint);
case XML_REGEXP_RANGES:
TODO;
return(0);
default:
break;
}
return(1);
}
/**
* xmlFARecurseDeterminism:
* @ctxt: a regexp parser context
*
* Check whether the associated regexp is determinist,
* should be called after xmlFAEliminateEpsilonTransitions()
*
*/
static int
xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
int to, xmlRegAtomPtr atom) {
int ret = 1;
int transnr;
xmlRegTransPtr t1;
if (state == NULL)
return(ret);
for (transnr = 0;transnr < state->nbTrans;transnr++) {
t1 = &(state->trans[transnr]);
/*
* check transitions conflicting with the one looked at
*/
if (t1->atom == NULL) {
if (t1->to == -1)
continue;
ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
to, atom);
if (ret == 0)
return(0);
continue;
}
if (t1->to != to)
continue;
if (xmlFACompareAtoms(t1->atom, atom))
return(0);
}
return(ret);
}
/**
* xmlFAComputesDeterminism:
* @ctxt: a regexp parser context
*
* Check whether the associated regexp is determinist,
* should be called after xmlFAEliminateEpsilonTransitions()
*
*/
static int
xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
int statenr, transnr;
xmlRegStatePtr state;
xmlRegTransPtr t1, t2;
int i;
int ret = 1;
if (ctxt->determinist != -1)
return(ctxt->determinist);
/*
* Check for all states that there isn't 2 transitions
* with the same atom and a different target.
*/
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state == NULL)
continue;
for (transnr = 0;transnr < state->nbTrans;transnr++) {
t1 = &(state->trans[transnr]);
/*
* Determinism checks in case of counted or all transitions
* will have to be handled separately
*/
if (t1->atom == NULL)
continue;
if (t1->to == -1) /* eliminated */
continue;
for (i = 0;i < transnr;i++) {
t2 = &(state->trans[i]);
if (t2->to == -1) /* eliminated */
continue;
if (t2->atom != NULL) {
if (t1->to == t2->to) {
if (xmlFACompareAtoms(t1->atom, t2->atom))
t2->to = -1; /* eliminate */
} else {
/* not determinist ! */
if (xmlFACompareAtoms(t1->atom, t2->atom))
ret = 0;
}
} else if (t1->to != -1) {
/*
* do the closure in case of remaining specific
* epsilon transitions like choices or all
*/
ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
t2->to, t2->atom);
if (ret == 0)
return(0);
}
}
if (ret == 0)
break;
}
if (ret == 0)
break;
}
ctxt->determinist = ret;
return(ret);
}
/************************************************************************
* *
* Routines to check input against transition atoms *
@ -3747,9 +3907,29 @@ xmlAutomataCompile(xmlAutomataPtr am) {
xmlRegexpPtr ret;
xmlFAEliminateEpsilonTransitions(am);
xmlFAComputesDeterminism(am);
ret = xmlRegEpxFromParse(am);
return(ret);
}
/**
* xmlAutomataIsDeterminist:
* @am: an automata
*
* Checks if an automata is determinist.
*
* Returns 1 if true, 0 if not, and -1 in case of error
*/
int
xmlAutomataIsDeterminist(xmlAutomataPtr am) {
int ret;
if (am == NULL)
return(-1);
ret = xmlFAComputesDeterminism(am);
return(ret);
}
#endif /* LIBXML_AUTOMATA_ENABLED */
#endif /* LIBXML_REGEXP_ENABLED */

View File

@ -55,6 +55,7 @@ struct _xmlSchemaParserCtxt {
void *userData; /* user specific data block */
xmlSchemaValidityErrorFunc error; /* the callback in case of errors */
xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */
xmlSchemaValidError err;
xmlSchemaPtr schema; /* The schema in use */
xmlChar *container; /* the current element, group, ... */
@ -3059,7 +3060,6 @@ xmlSchemaBuildAContentModel(xmlSchemaTypePtr type,
lax = type->minOccurs == 0;
ctxt->state = xmlAutomataNewAllTrans(ctxt->am, ctxt->state, NULL,
lax);
TODO
break;
}
case XML_SCHEMA_TYPE_RESTRICTION:
@ -3128,12 +3128,19 @@ xmlSchemaBuildContentModel(xmlSchemaElementPtr elem,
start = ctxt->state = xmlAutomataGetInitState(ctxt->am);
xmlSchemaBuildAContentModel(elem->subtypes, ctxt, name);
xmlAutomataSetFinalState(ctxt->am, ctxt->state);
elem->contModel = xmlAutomataCompile(ctxt->am);
if (!xmlAutomataIsDeterminist(ctxt->am)) {
xmlGenericError(xmlGenericErrorContext,
"Content model of %s is not determinist:\n", name);
elem->contModel = xmlAutomataCompile(ctxt->am);
ctxt->err = XML_SCHEMAS_ERR_NOTDETERMINIST;
} else {
elem->contModel = xmlAutomataCompile(ctxt->am);
#ifdef DEBUG_CONTENT
xmlGenericError(xmlGenericErrorContext,
"Content model of %s:\n", name);
xmlRegexpPrint(stderr, elem->contModel);
xmlGenericError(xmlGenericErrorContext,
"Content model of %s:\n", name);
xmlRegexpPrint(stderr, elem->contModel);
#endif
}
ctxt->state = NULL;
xmlFreeAutomata(ctxt->am);
ctxt->am = NULL;
@ -4461,8 +4468,6 @@ xmlSchemaValidateComplexType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
child = ctxt->node;
type = ctxt->type;
/* 3.4.4 1 was verified on the caller */
switch (type->contentType) {
case XML_SCHEMA_CONTENT_EMPTY:
if (child != NULL) {
@ -4471,6 +4476,15 @@ xmlSchemaValidateComplexType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
"Element %s is supposed to be empty\n",
node->name);
}
if (type->attributes != NULL) {
xmlSchemaValidateAttributes(ctxt, node, type->attributes);
}
subtype = type->subtypes;
while (subtype != NULL) {
ctxt->type = subtype;
xmlSchemaValidateComplexType(ctxt, node);
subtype = subtype->next;
}
break;
case XML_SCHEMA_CONTENT_ELEMENTS:
case XML_SCHEMA_CONTENT_MIXED:
@ -4479,7 +4493,6 @@ xmlSchemaValidateComplexType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
* Skip ignorable nodes in that context
*/
child = xmlSchemaSkipIgnored(ctxt, type, child);
subtype = type->subtypes;
while (child != NULL) {
if (child->type == XML_ELEMENT_NODE) {
ret = xmlRegExecPushString(ctxt->regexp,
@ -4543,6 +4556,8 @@ xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
child = ctxt->node;
type = ctxt->type;
xmlSchemaValidateAttributes(ctxt, node, type->attributes);
switch (type->type) {
case XML_SCHEMA_TYPE_ANY:
/* Any type will do it, fine */

View File

@ -425,7 +425,7 @@ xmlSchemaCompareDecimals(xmlSchemaValPtr x, xmlSchemaValPtr y)
*
* Returns -1 if x < y, 0 if x == y, 1 if x > y and -2 in case of error
*/
int
static int
xmlSchemaCompareValues(xmlSchemaValPtr x, xmlSchemaValPtr y) {
if ((x == NULL) || (y == NULL))
return(-2);
@ -484,6 +484,21 @@ xmlSchemaValidateFacet(xmlSchemaTypePtr base, xmlSchemaFacetPtr facet,
case XML_SCHEMA_FACET_WHITESPACE:
TODO /* whitespaces */
return(0);
case XML_SCHEMA_FACET_MAXLENGTH:
if ((facet->val != NULL) &&
(facet->val->type == XML_SCHEMAS_DECIMAL) &&
(facet->val->value.decimal.frac == 0)) {
int len;
if (facet->val->value.decimal.sign == 1)
return(1);
len = xmlUTF8Strlen(value);
if (len > facet->val->value.decimal.base)
return(1);
return(0);
}
TODO /* error code */
return(1);
default:
TODO
}