mirror of
https://github.com/GNOME/libxml2.git
synced 2025-01-30 17:30:01 +08:00
uri.c uri.h: finished the escaping handling, the base support and the URI
path normalization. Makefile.am: added instructions to generate testURI TODO: updated doc/xml.html, doc/smallfootonly.gif doc/w3c.png: updated, added links and icons for W3C and Gnome Daniel
This commit is contained in:
parent
8f62198686
commit
ec30341c5c
10
ChangeLog
10
ChangeLog
@ -1,3 +1,13 @@
|
||||
Fri Mar 24 14:35:21 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* uri.c uri.h: finished the escaping handling, the base support
|
||||
and the URI path normalization. Looks good just lacks the
|
||||
authority content parsing code.
|
||||
* Makefile.am: added instructions to generate testURI
|
||||
* TODO: updated
|
||||
* doc/xml.html, doc/smallfootonly.gif doc/w3c.png: updated,
|
||||
added links and icons for W3C and Gnome
|
||||
|
||||
Mon Mar 20 14:05:26 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* xmlmemory.[ch] : seems I forgot to actually update the files in
|
||||
|
@ -6,7 +6,7 @@ INCLUDES = -I@srcdir@ @Z_CFLAGS@ @CORBA_CFLAGS@ $(VERSION_FLAGS)
|
||||
|
||||
VERSION_FLAGS = -DLIBXML_VERSION=\"@LIBXML_VERSION@\"
|
||||
|
||||
noinst_PROGRAMS=tester testSAX testHTML testXPath
|
||||
noinst_PROGRAMS=tester testSAX testHTML testXPath testURI
|
||||
|
||||
bin_SCRIPTS=xml-config
|
||||
|
||||
@ -84,6 +84,9 @@ testall : tests SVGtests SAXtests XPathtests XMLenttests
|
||||
|
||||
tests: XMLtests HTMLtests Validtests
|
||||
|
||||
testURI: $(srcdir)/uri.c $(srcdir)/uri.h xmlmemory.o
|
||||
$(CC) $(CFLAGS) -DSTANDALONE -o testURI $(srcdir)/uri.c xmlmemory.o
|
||||
|
||||
HTMLtests : testHTML
|
||||
@echo "##"
|
||||
@echo "## HTML regression tests"
|
||||
|
4
TODO
4
TODO
@ -5,8 +5,6 @@
|
||||
TODO:
|
||||
=====
|
||||
|
||||
- extend validity checks to go through entities content instead of
|
||||
just labelling them PCDATA
|
||||
- add support for the trick from Henry conf/sun/valid/empty.xml
|
||||
- Correct standalone checking/emitting (hard)
|
||||
2.9 Standalone Document Declaration
|
||||
@ -90,6 +88,8 @@ EXTENSIONS:
|
||||
Done:
|
||||
=====
|
||||
|
||||
- extend validity checks to go through entities content instead of
|
||||
just labelling them PCDATA
|
||||
- Save Dtds using the children list instead of dumping the tables,
|
||||
order is preserved as well as comments and PIs
|
||||
- Wrote a notice of changes requires to go from 1.x to 2.x
|
||||
|
BIN
doc/smallfootonly.gif
Normal file
BIN
doc/smallfootonly.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.7 KiB |
BIN
doc/w3c.png
Normal file
BIN
doc/w3c.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.0 KiB |
62
doc/xml.html
62
doc/xml.html
@ -8,6 +8,10 @@
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff">
|
||||
<p><a href="http://www.gnome.org/"><img src="smallfootonly.gif" alt="Gnome
|
||||
Logo"></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C
|
||||
Logo"></a></p>
|
||||
|
||||
<h1 align="center">The XML library for Gnome</h1>
|
||||
|
||||
<h2 style="text-align: center">libxml, a.k.a. gnome-xml</h2>
|
||||
@ -58,8 +62,10 @@ building tag-based structured documents/data.</p>
|
||||
href="http://www.w3.org/TR/xpath">XPath</a> implementation.</li>
|
||||
<li>Libxml exports Push and Pull type parser interfaces for both XML and
|
||||
HTML.</li>
|
||||
<li>This library is released both under the W3C Copyright and the GNU LGPL.
|
||||
Basically, everybody should be happy; if not, drop me a mail.</li>
|
||||
<li>This library is released both under the <a
|
||||
href="http://www.w3.org/Consortium/Legal/copyright-software-19980720.html">W3C
|
||||
IPR</a> and the GNU LGPL. Use either at your convenience, basically this
|
||||
should make everybody happy, if not, drop me a mail.</li>
|
||||
<li>There is <a href="upgrade.html">a first set of instruction</a>
|
||||
concerning upgrade from libxml-1.x to libxml-2.x</li>
|
||||
</ul>
|
||||
@ -87,6 +93,11 @@ building tag-based structured documents/data.</p>
|
||||
href="http://www.levien.com/svg/">gill</a>. Check his <a
|
||||
href="http://www.levien.com/gnome/domination.html">DOMination
|
||||
paper</a>.</li>
|
||||
<li>Check <a href="http://cvs.gnome.org/lxr/source/gnome-xml/TODO">the TODO
|
||||
file</a></li>
|
||||
<li>Read the <a href="upgrade.html">1.x to 2.x upgrade path</a>. If you are
|
||||
starting a new project using libxml you should really use the 2.x
|
||||
version.</li>
|
||||
<li>And don't forget to look at the <a href="/messages/">mailing-list
|
||||
archive</a>, too.</li>
|
||||
</ol>
|
||||
@ -111,6 +122,16 @@ href="mailto:majordomo@rufus.w3.org">majordomo@rufus.w3.org</a> with
|
||||
<p>Alternatively, you can just send the bug to the <a
|
||||
href="mailto:xml@rufus.w3.org">xml@rufus.w3.org</a> list.</p>
|
||||
|
||||
<p>Of course, bugs reports with a suggested patch for fixing them will
|
||||
probably be processed faster.</p>
|
||||
|
||||
<p>If you're looking for help, a quick look at <a
|
||||
href="http://xmlsoft.org/messages/#407">the list archive</a> may actually
|
||||
provide the answer, I usually send source samples when answering libxml usage
|
||||
questions. The <a href="http://xmlsoft.org/book1.html">auto-generated
|
||||
documentantion</a> is not as polished as I would like (i need to learn more
|
||||
about Docbook), but it's a good starting point.</p>
|
||||
|
||||
<h2><a name="Downloads">Downloads</a></h2>
|
||||
|
||||
<p>The latest versions of libxml can be found on <a
|
||||
@ -771,33 +792,42 @@ finding them in the input).</p>
|
||||
|
||||
<h2><a name="Namespaces">Namespaces</a></h2>
|
||||
|
||||
<p>The libxml library implements namespace @@ support by recognizing namespace
|
||||
contructs in the input, and does namespace lookup automatically when building
|
||||
the DOM tree. A namespace declaration is associated with an in-memory
|
||||
structure and all elements or attributes within that namespace point to it.
|
||||
Hence testing the namespace is a simple and fast equality operation at the
|
||||
user level.</p>
|
||||
<p>The libxml library implements <a
|
||||
href="http://www.w3.org/TR/REC-xml-names/">XML namespaces</a> support by
|
||||
recognizing namespace contructs in the input, and does namespace lookup
|
||||
automatically when building the DOM tree. A namespace declaration is
|
||||
associated with an in-memory structure and all elements or attributes within
|
||||
that namespace point to it. Hence testing the namespace is a simple and fast
|
||||
equality operation at the user level.</p>
|
||||
|
||||
<p>I suggest that people using libxml use a namespace, and declare it in the
|
||||
root element of their document as the default namespace. Then they don't need
|
||||
to use the prefix in the content but we will have a basis for future semantic
|
||||
refinement and merging of data from different sources. This doesn't augment
|
||||
significantly the size of the XML output, but significantly increase its value
|
||||
in the long-term.</p>
|
||||
in the long-term. Example:</p>
|
||||
<pre><mydoc xmlns="http://mydoc.example.org/schemas/">
|
||||
<elem1>...</elem1>
|
||||
<elem2>...</elem2>
|
||||
</mydoc></pre>
|
||||
|
||||
<p>Concerning the namespace value, this has to be an URL, but the URL doesn't
|
||||
have to point to any existing resource on the Web. I suggest that it makes
|
||||
sense to use an URL within a domain you control, and that the URL should
|
||||
contain some kind of version information if possible. For example,
|
||||
<code>"http://www.gnome.org/gnumeric/1.0"</code> is a good namespace scheme.
|
||||
Then when you load a file, make sure that a namespace carrying the
|
||||
have to point to any existing resource on the Web. It will bind all the
|
||||
element and atributes with that URL. I suggest to use an URL within a domain
|
||||
you control, and that the URL should contain some kind of version information
|
||||
if possible. For example, <code>"http://www.gnome.org/gnumeric/1.0/"</code> is
|
||||
a good namespace scheme. </p>
|
||||
|
||||
<p>Then when you load a file, make sure that a namespace carrying the
|
||||
version-independent prefix is installed on the root element of your document,
|
||||
and if the version information don't match something you know, warn the user
|
||||
and be liberal in what you accept as the input. Also do *not* try to base
|
||||
namespace checking on the prefix value. <foo:text> may be exactly the same
|
||||
as <bar:text> in another document. What really matter is the URI associated
|
||||
with the element or the attribute, not the prefix string (which is just a
|
||||
shortcut for the full URI).</p>
|
||||
shortcut for the full URI). In libxml element and attributes have a
|
||||
<code>ns</code> field pointing to an xmlNs structure detailing the namespace
|
||||
prefix and it's URI.</p>
|
||||
|
||||
<p>@@Interfaces@@</p>
|
||||
|
||||
@ -1082,6 +1112,6 @@ base under gnome-xml/example</p>
|
||||
|
||||
<p><a href="mailto:Daniel.Veillard@w3.org">Daniel Veillard</a></p>
|
||||
|
||||
<p>$Id: xml.html,v 1.29 2000/03/14 19:59:03 veillard Exp $</p>
|
||||
<p>$Id: xml.html,v 1.30 2000/03/20 13:07:14 veillard Exp $</p>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -38,9 +38,11 @@ struct _xmlURI {
|
||||
* xmlChar * xmlNodeGetBase (xmlDocPtr doc,
|
||||
* xmlNodePtr cur);
|
||||
*/
|
||||
xmlChar * xmlBuildURI (const xmlChar *URI,
|
||||
const xmlChar *base);
|
||||
xmlURIPtr xmlParseURI (const char *URI);
|
||||
xmlChar * xmlBuildURI (const xmlChar *URI,
|
||||
const xmlChar *base);
|
||||
xmlURIPtr xmlParseURI (const char *URI);
|
||||
xmlChar * xmlSaveUri (xmlURIPtr uri);
|
||||
int xmlNormalizeURIPath (char *path);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
660
uri.c
660
uri.c
@ -204,6 +204,299 @@ xmlCreateURI(void) {
|
||||
return(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSaveUri:
|
||||
* @uri: pointer to an xmlURI
|
||||
*
|
||||
* Save the URI as an escaped string
|
||||
*
|
||||
* Returns a new string (to be deallocated by caller)
|
||||
*/
|
||||
xmlChar *
|
||||
xmlSaveUri(xmlURIPtr uri) {
|
||||
xmlChar *ret = NULL;
|
||||
const char *p;
|
||||
int len;
|
||||
int max;
|
||||
|
||||
if (uri == NULL) return(NULL);
|
||||
|
||||
|
||||
max = 80;
|
||||
ret = xmlMalloc((max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
len = 0;
|
||||
|
||||
if (uri->scheme != NULL) {
|
||||
p = uri->scheme;
|
||||
while (*p != 0) {
|
||||
if (len >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = *p++;
|
||||
}
|
||||
if (len >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = ':';
|
||||
}
|
||||
if (uri->opaque != NULL) {
|
||||
p = uri->opaque;
|
||||
while (*p != 0) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
if ((IS_UNRESERVED(*(p))) ||
|
||||
((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
|
||||
((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
|
||||
((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
|
||||
ret[len++] = *p++;
|
||||
else {
|
||||
int val = *p++;
|
||||
ret[len++] = '%';
|
||||
switch (val / 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val / 0x10);
|
||||
}
|
||||
switch (val % 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val % 0x10);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (len >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = 0;
|
||||
} else {
|
||||
if (uri->authority != NULL) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = '/';
|
||||
ret[len++] = '/';
|
||||
p = uri->authority;
|
||||
while (*p != 0) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
if ((IS_UNRESERVED(*(p))) ||
|
||||
((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
|
||||
((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
|
||||
((*(p) == '=')) || ((*(p) == '+')))
|
||||
ret[len++] = *p++;
|
||||
else {
|
||||
int val = *p++;
|
||||
ret[len++] = '%';
|
||||
switch (val / 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val / 0x10);
|
||||
}
|
||||
switch (val % 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val % 0x10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (uri->path != NULL) {
|
||||
p = uri->path;
|
||||
while (*p != 0) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
|
||||
((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
|
||||
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
|
||||
((*(p) == ',')))
|
||||
ret[len++] = *p++;
|
||||
else {
|
||||
int val = *p++;
|
||||
ret[len++] = '%';
|
||||
switch (val / 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val / 0x10);
|
||||
}
|
||||
switch (val % 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val % 0x10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (uri->query != NULL) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = '?';
|
||||
p = uri->query;
|
||||
while (*p != 0) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
|
||||
ret[len++] = *p++;
|
||||
else {
|
||||
int val = *p++;
|
||||
ret[len++] = '%';
|
||||
switch (val / 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val / 0x10);
|
||||
}
|
||||
switch (val % 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val % 0x10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (uri->fragment != NULL) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = '#';
|
||||
p = uri->fragment;
|
||||
while (*p != 0) {
|
||||
if (len + 3 >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
|
||||
ret[len++] = *p++;
|
||||
else {
|
||||
int val = *p++;
|
||||
ret[len++] = '%';
|
||||
switch (val / 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val / 0x10);
|
||||
}
|
||||
switch (val % 0x10) {
|
||||
case 0xF: ret[len++] = 'F'; break;
|
||||
case 0xE: ret[len++] = 'E'; break;
|
||||
case 0xD: ret[len++] = 'D'; break;
|
||||
case 0xC: ret[len++] = 'C'; break;
|
||||
case 0xB: ret[len++] = 'B'; break;
|
||||
case 0xA: ret[len++] = 'A'; break;
|
||||
default: ret[len++] = '0' + (val % 0x10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (len >= max) {
|
||||
max *= 2;
|
||||
ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "xmlSaveUri: out of memory\n");
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
ret[len++] = 0;
|
||||
}
|
||||
return(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlPrintURI:
|
||||
* @stream: a FILE* for the output
|
||||
@ -213,28 +506,12 @@ xmlCreateURI(void) {
|
||||
*/
|
||||
void
|
||||
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
|
||||
if (uri == NULL) return;
|
||||
xmlChar *out;
|
||||
|
||||
/* TODO !!! URI encoding ... improper ! */
|
||||
if (uri->scheme != NULL)
|
||||
fprintf(stderr, "%s:", uri->scheme);
|
||||
if (uri->opaque != NULL) {
|
||||
fprintf(stderr, "%s", uri->opaque);
|
||||
} else {
|
||||
if (uri->authority != NULL)
|
||||
fprintf(stderr, "//%s", uri->authority);
|
||||
|
||||
/* TODO !!!
|
||||
if (uri->server != NULL) xmlFree(uri->server);
|
||||
*/
|
||||
|
||||
if (uri->path != NULL)
|
||||
fprintf(stderr, "%s", uri->path);
|
||||
|
||||
if (uri->query != NULL)
|
||||
fprintf(stderr, "?%s", uri->query);
|
||||
if (uri->fragment != NULL)
|
||||
fprintf(stderr, "#%s", uri->fragment);
|
||||
out = xmlSaveUri(uri);
|
||||
if (out != NULL) {
|
||||
fprintf(stream, "%s", out);
|
||||
xmlFree(out);
|
||||
}
|
||||
}
|
||||
|
||||
@ -292,6 +569,7 @@ xmlFreeURI(xmlURIPtr uri) {
|
||||
* @target: optionnal destination buffer
|
||||
*
|
||||
* Unescaping routine, does not do validity checks !
|
||||
* Output is direct unsigned char translation of %XX values (no encoding)
|
||||
*
|
||||
* Returns an copy of the string, but unescaped
|
||||
*/
|
||||
@ -333,6 +611,7 @@ xmlURIUnescape(const char *str, int len, char *target) {
|
||||
*out = *out * 16 + (*in - 'A') + 10;
|
||||
in++;
|
||||
len -= 3;
|
||||
out++;
|
||||
} else {
|
||||
*out++ = *in++;
|
||||
len--;
|
||||
@ -790,6 +1069,137 @@ xmlParseURIReference(xmlURIPtr uri, const char *str) {
|
||||
return(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlNormalizeURIPath:
|
||||
* @path: pointer to the path string
|
||||
*
|
||||
* applies the 5 normalization steps to a path string
|
||||
* Normalization occurs directly on the string, no new allocation is done
|
||||
*
|
||||
* Returns 0 or an error code
|
||||
*/
|
||||
int
|
||||
xmlNormalizeURIPath(char *path) {
|
||||
int cur, out;
|
||||
|
||||
if (path == NULL)
|
||||
return(-1);
|
||||
cur = 0;
|
||||
out = 0;
|
||||
while ((path[cur] != 0) && (path[cur] != '/')) cur++;
|
||||
if (path[cur] == 0)
|
||||
return(0);
|
||||
|
||||
/* we are positionned at the beginning of the first segment */
|
||||
cur++;
|
||||
out = cur;
|
||||
|
||||
/*
|
||||
* Analyze each segment in sequence.
|
||||
*/
|
||||
while (path[cur] != 0) {
|
||||
/*
|
||||
* c) All occurrences of "./", where "." is a complete path segment,
|
||||
* are removed from the buffer string.
|
||||
*/
|
||||
if ((path[cur] == '.') && (path[cur + 1] == '/')) {
|
||||
cur += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* d) If the buffer string ends with "." as a complete path segment,
|
||||
* that "." is removed.
|
||||
*/
|
||||
if ((path[cur] == '.') && (path[cur + 1] == 0)) {
|
||||
path[out] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* read the segment */
|
||||
while ((path[cur] != 0) && (path[cur] != '/')) {
|
||||
path[out++] = path[cur++];
|
||||
}
|
||||
path[out++] = path[cur];
|
||||
if (path[cur] != 0) {
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
cur = 0;
|
||||
out = 0;
|
||||
while ((path[cur] != 0) && (path[cur] != '/')) cur++;
|
||||
if (path[cur] == 0)
|
||||
return(0);
|
||||
/* we are positionned at the beginning of the first segment */
|
||||
cur++;
|
||||
out = cur;
|
||||
/*
|
||||
* Analyze each segment in sequence.
|
||||
*/
|
||||
while (path[cur] != 0) {
|
||||
/*
|
||||
* e) All occurrences of "<segment>/../", where <segment> is a
|
||||
* complete path segment not equal to "..", are removed from the
|
||||
* buffer string. Removal of these path segments is performed
|
||||
* iteratively, removing the leftmost matching pattern on each
|
||||
* iteration, until no matching pattern remains.
|
||||
*/
|
||||
if ((cur > 1) && (out > 1) &&
|
||||
(path[cur] == '/') && (path[cur + 1] == '.') &&
|
||||
(path[cur + 2] == '.') && (path[cur + 3] == '/') &&
|
||||
((path[out] != '.') || (path[out - 1] != '.') ||
|
||||
(path[out - 2] != '/'))) {
|
||||
cur += 3;
|
||||
out --;
|
||||
while ((out > 0) && (path[out] != '/')) { out --; }
|
||||
path[out] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* f) If the buffer string ends with "<segment>/..", where <segment>
|
||||
* is a complete path segment not equal to "..", that
|
||||
* "<segment>/.." is removed.
|
||||
*/
|
||||
if ((path[cur] == '/') && (path[cur + 1] == '.') &&
|
||||
(path[cur + 2] == '.') && (path[cur + 3] == 0) &&
|
||||
((path[out] != '.') || (path[out - 1] != '.') ||
|
||||
(path[out - 2] != '/'))) {
|
||||
cur += 4;
|
||||
out --;
|
||||
while ((out > 0) && (path[out - 1] != '/')) { out --; }
|
||||
path[out] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
path[out++] = path[cur++]; /* / or 0 */
|
||||
}
|
||||
path[out] = 0;
|
||||
|
||||
/*
|
||||
* g) If the resulting buffer string still begins with one or more
|
||||
* complete path segments of "..", then the reference is
|
||||
* considered to be in error. Implementations may handle this
|
||||
* error by retaining these components in the resolved path (i.e.,
|
||||
* treating them as part of the final URI), by removing them from
|
||||
* the resolved path (i.e., discarding relative levels above the
|
||||
* root), or by avoiding traversal of the reference.
|
||||
*
|
||||
* We discard them from the final path.
|
||||
*/
|
||||
cur = 0;
|
||||
while ((path[cur] == '/') && (path[cur + 1] == '.') &&
|
||||
(path[cur + 2] == '.'))
|
||||
cur += 3;
|
||||
if (cur != 0) {
|
||||
out = 0;
|
||||
while (path[cur] != 0) path[out++] = path[cur++];
|
||||
path[out] = 0;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlBuildURI:
|
||||
* @URI: the URI instance found in the document
|
||||
@ -802,22 +1212,195 @@ xmlParseURIReference(xmlURIPtr uri, const char *str) {
|
||||
*
|
||||
* 5.2. Resolving Relative References to Absolute Form
|
||||
*
|
||||
* Returns a new URI string (to be freed by the caller)
|
||||
* Returns a new URI string (to be freed by the caller) or NULL in case
|
||||
* of error.
|
||||
*/
|
||||
xmlChar *
|
||||
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
|
||||
/* TODO */
|
||||
return(NULL);
|
||||
xmlChar *val = NULL;
|
||||
int ret, len, index, cur, out;
|
||||
xmlURIPtr ref = NULL;
|
||||
xmlURIPtr bas = NULL;
|
||||
xmlURIPtr res = NULL;
|
||||
|
||||
|
||||
/*
|
||||
* 1) The URI reference is parsed into the potential four components and
|
||||
* fragment identifier, as described in Section 4.3.
|
||||
*/
|
||||
ref = xmlCreateURI();
|
||||
if (ref == NULL)
|
||||
goto done;
|
||||
ret = xmlParseURIReference(ref, (const char *) URI);
|
||||
if (ret != 0)
|
||||
goto done;
|
||||
bas = xmlCreateURI();
|
||||
if (bas == NULL)
|
||||
goto done;
|
||||
ret = xmlParseURIReference(bas, (const char *) base);
|
||||
if (ret != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* 2) If the path component is empty and the scheme, authority, and
|
||||
* query components are undefined, then it is a reference to the
|
||||
* current document and we are done. Otherwise, the reference URI's
|
||||
* query and fragment components are defined as found (or not found)
|
||||
* within the URI reference and not inherited from the base URI.
|
||||
*/
|
||||
res = xmlCreateURI();
|
||||
if (res == NULL)
|
||||
goto done;
|
||||
if ((ref->scheme == NULL) && (ref->path == NULL) &&
|
||||
(ref->authority == NULL) && (ref->query == NULL)) {
|
||||
if (ref->fragment == NULL)
|
||||
goto done;
|
||||
res->fragment = xmlMemStrdup(ref->fragment);
|
||||
val = xmlSaveUri(res);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* 3) If the scheme component is defined, indicating that the reference
|
||||
* starts with a scheme name, then the reference is interpreted as an
|
||||
* absolute URI and we are done. Otherwise, the reference URI's
|
||||
* scheme is inherited from the base URI's scheme component.
|
||||
*/
|
||||
if (ref->scheme != NULL) {
|
||||
val = xmlSaveUri(ref);
|
||||
goto done;
|
||||
}
|
||||
res->scheme = xmlMemStrdup(bas->scheme);
|
||||
|
||||
/*
|
||||
* 4) If the authority component is defined, then the reference is a
|
||||
* network-path and we skip to step 7. Otherwise, the reference
|
||||
* URI's authority is inherited from the base URI's authority
|
||||
* component, which will also be undefined if the URI scheme does not
|
||||
* use an authority component.
|
||||
*/
|
||||
if (ref->authority != NULL) {
|
||||
res->authority = xmlMemStrdup(ref->authority);
|
||||
if (ref->path != NULL)
|
||||
res->path = xmlMemStrdup(ref->path);
|
||||
if (ref->query != NULL)
|
||||
res->query = xmlMemStrdup(ref->query);
|
||||
if (ref->fragment != NULL)
|
||||
res->fragment = xmlMemStrdup(ref->fragment);
|
||||
goto step_7;
|
||||
}
|
||||
if (bas->authority != NULL)
|
||||
res->authority = xmlMemStrdup(bas->authority);
|
||||
|
||||
/*
|
||||
* 5) If the path component begins with a slash character ("/"), then
|
||||
* the reference is an absolute-path and we skip to step 7.
|
||||
*/
|
||||
if ((ref->path != NULL) && (ref->path[0] == '/')) {
|
||||
res->path = xmlMemStrdup(ref->path);
|
||||
if (ref->query != NULL)
|
||||
res->query = xmlMemStrdup(ref->query);
|
||||
if (ref->fragment != NULL)
|
||||
res->fragment = xmlMemStrdup(ref->fragment);
|
||||
goto step_7;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* 6) If this step is reached, then we are resolving a relative-path
|
||||
* reference. The relative path needs to be merged with the base
|
||||
* URI's path. Although there are many ways to do this, we will
|
||||
* describe a simple method using a separate string buffer.
|
||||
*
|
||||
* Allocate a buffer large enough for the result string.
|
||||
*/
|
||||
len = 2; /* extra / and 0 */
|
||||
if (ref->path != NULL)
|
||||
len += strlen(ref->path);
|
||||
if (bas->path != NULL)
|
||||
len += strlen(bas->path);
|
||||
res->path = (char *) xmlMalloc(len);
|
||||
if (res->path == NULL) {
|
||||
fprintf(stderr, "xmlBuildURI: out of memory\n");
|
||||
goto done;
|
||||
}
|
||||
res->path[0] = 0;
|
||||
|
||||
/*
|
||||
* a) All but the last segment of the base URI's path component is
|
||||
* copied to the buffer. In other words, any characters after the
|
||||
* last (right-most) slash character, if any, are excluded.
|
||||
*/
|
||||
cur = 0;
|
||||
out = 0;
|
||||
if (bas->path != NULL) {
|
||||
while (bas->path[cur] != 0) {
|
||||
while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
|
||||
cur++;
|
||||
if (bas->path[cur] == 0)
|
||||
break;
|
||||
|
||||
cur++;
|
||||
while (out < cur) {
|
||||
res->path[out] = bas->path[out];
|
||||
out++;
|
||||
}
|
||||
}
|
||||
}
|
||||
res->path[out] = 0;
|
||||
|
||||
/*
|
||||
* b) The reference's path component is appended to the buffer
|
||||
* string.
|
||||
*/
|
||||
if (ref->path != NULL) {
|
||||
index = 0;
|
||||
while (ref->path[index] != 0) {
|
||||
res->path[out++] = ref->path[index++];
|
||||
}
|
||||
}
|
||||
res->path[out] = 0;
|
||||
|
||||
/*
|
||||
* Steps c) to h) are really path normalization steps
|
||||
*/
|
||||
xmlNormalizeURIPath(res->path);
|
||||
|
||||
step_7:
|
||||
|
||||
/*
|
||||
* 7) The resulting URI components, including any inherited from the
|
||||
* base URI, are recombined to give the absolute form of the URI
|
||||
* reference.
|
||||
*/
|
||||
val = xmlSaveUri(res);
|
||||
|
||||
done:
|
||||
if (ref != NULL)
|
||||
xmlFreeURI(ref);
|
||||
if (base != NULL)
|
||||
xmlFreeURI(bas);
|
||||
if (res != NULL)
|
||||
xmlFreeURI(res);
|
||||
return(val);
|
||||
}
|
||||
|
||||
|
||||
#ifdef STANDALONE
|
||||
int main(int argc, char **argv) {
|
||||
int i, ret;
|
||||
int i, ret, arg = 1;
|
||||
xmlURIPtr uri;
|
||||
const char *base = NULL;
|
||||
xmlChar *composite;
|
||||
|
||||
if ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base"))) {
|
||||
arg++;
|
||||
base = argv[arg];
|
||||
if (base != NULL)
|
||||
arg++;
|
||||
}
|
||||
uri = xmlCreateURI();
|
||||
if (argc <= 1) {
|
||||
if (argv[arg] == NULL) {
|
||||
char str[1024];
|
||||
|
||||
while (1) {
|
||||
@ -850,17 +1433,28 @@ int main(int argc, char **argv) {
|
||||
|
||||
}
|
||||
} else {
|
||||
for (i = 1;i < argc;i++) {
|
||||
ret = xmlParseURIReference(uri, argv[i]);
|
||||
if (ret != 0)
|
||||
printf("%s : error %d\n", argv[i], ret);
|
||||
else {
|
||||
xmlPrintURI(stdout, uri);
|
||||
printf("\n");
|
||||
while (argv[arg] != NULL) {
|
||||
if (base == NULL) {
|
||||
ret = xmlParseURIReference(uri, argv[arg]);
|
||||
if (ret != 0)
|
||||
printf("%s : error %d\n", argv[arg], ret);
|
||||
else {
|
||||
xmlPrintURI(stdout, uri);
|
||||
printf("\n");
|
||||
}
|
||||
} else {
|
||||
composite = xmlBuildURI((xmlChar *)argv[arg], (xmlChar *) base);
|
||||
if (base == NULL) {
|
||||
} else {
|
||||
printf("%s\n", composite);
|
||||
xmlFree(composite);
|
||||
}
|
||||
}
|
||||
arg++;
|
||||
}
|
||||
}
|
||||
xmlFreeURI(uri);
|
||||
xmlMemoryDump();
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
8
uri.h
8
uri.h
@ -38,9 +38,11 @@ struct _xmlURI {
|
||||
* xmlChar * xmlNodeGetBase (xmlDocPtr doc,
|
||||
* xmlNodePtr cur);
|
||||
*/
|
||||
xmlChar * xmlBuildURI (const xmlChar *URI,
|
||||
const xmlChar *base);
|
||||
xmlURIPtr xmlParseURI (const char *URI);
|
||||
xmlChar * xmlBuildURI (const xmlChar *URI,
|
||||
const xmlChar *base);
|
||||
xmlURIPtr xmlParseURI (const char *URI);
|
||||
xmlChar * xmlSaveUri (xmlURIPtr uri);
|
||||
int xmlNormalizeURIPath (char *path);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user