2001-08-21 23:26:10 +08:00
|
|
|
/* Parser interface for DOM-based parser (libxml) rather than
|
|
|
|
stream-based SAX-type parser */
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "fmgr.h"
|
|
|
|
|
|
|
|
/* libxml includes */
|
|
|
|
|
|
|
|
#include <libxml/xpath.h>
|
|
|
|
#include <libxml/tree.h>
|
|
|
|
#include <libxml/xmlmemory.h>
|
|
|
|
|
|
|
|
/* declarations */
|
|
|
|
|
|
|
|
static void *pgxml_palloc(size_t size);
|
|
|
|
static void *pgxml_repalloc(void *ptr, size_t size);
|
|
|
|
static void pgxml_pfree(void *ptr);
|
2001-10-25 13:50:21 +08:00
|
|
|
static char *pgxml_pstrdup(const char *string);
|
2001-08-21 23:26:10 +08:00
|
|
|
|
|
|
|
static void pgxml_parser_init();
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
|
|
|
|
xmlChar * toptagname, xmlChar * septagname,
|
2001-08-21 23:26:10 +08:00
|
|
|
int format);
|
|
|
|
|
|
|
|
static xmlChar *pgxml_texttoxmlchar(text *textstring);
|
|
|
|
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
Datum pgxml_parse(PG_FUNCTION_ARGS);
|
|
|
|
Datum pgxml_xpath(PG_FUNCTION_ARGS);
|
2001-08-21 23:26:10 +08:00
|
|
|
|
|
|
|
/* memory handling passthrough functions (e.g. palloc, pstrdup are
|
|
|
|
currently macros, and the others might become so...) */
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static void *
|
|
|
|
pgxml_palloc(size_t size)
|
2001-08-21 23:26:10 +08:00
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
return palloc(size);
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static void *
|
|
|
|
pgxml_repalloc(void *ptr, size_t size)
|
2001-08-21 23:26:10 +08:00
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
return repalloc(ptr, size);
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static void
|
|
|
|
pgxml_pfree(void *ptr)
|
2001-08-21 23:26:10 +08:00
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
return pfree(ptr);
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static char *
|
|
|
|
pgxml_pstrdup(const char *string)
|
2001-08-21 23:26:10 +08:00
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
return pstrdup(string);
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static void
|
|
|
|
pgxml_parser_init()
|
2001-08-21 23:26:10 +08:00
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
/*
|
|
|
|
* This code should also set parser settings from user-supplied info.
|
|
|
|
* Quite how these settings are made is another matter :)
|
|
|
|
*/
|
2001-08-21 23:26:10 +08:00
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
|
|
|
|
xmlInitParser();
|
2001-08-21 23:26:10 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Returns true if document is well-formed */
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(pgxml_parse);
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pgxml_parse(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
/* called as pgxml_parse(document) */
|
|
|
|
xmlDocPtr doctree;
|
|
|
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
|
|
|
int32 docsize = VARSIZE(t) - VARHDRSZ;
|
|
|
|
|
|
|
|
pgxml_parser_init();
|
|
|
|
|
|
|
|
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
|
|
|
|
if (doctree == NULL)
|
|
|
|
{
|
2003-08-04 08:43:34 +08:00
|
|
|
xmlCleanupParser();
|
2001-10-25 13:50:21 +08:00
|
|
|
PG_RETURN_BOOL(false); /* i.e. not well-formed */
|
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
xmlCleanupParser();
|
2001-10-25 13:50:21 +08:00
|
|
|
xmlFreeDoc(doctree);
|
|
|
|
PG_RETURN_BOOL(true);
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static xmlChar
|
|
|
|
*
|
|
|
|
pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
|
|
|
|
xmlDocPtr doc,
|
|
|
|
xmlChar * toptagname,
|
|
|
|
xmlChar * septagname,
|
|
|
|
int format)
|
2001-08-21 23:26:10 +08:00
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
/* Function translates a nodeset into a text representation */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* iterates over each node in the set and calls xmlNodeDump to write
|
|
|
|
* it to an xmlBuffer -from which an xmlChar * string is returned.
|
|
|
|
*/
|
|
|
|
/* each representation is surrounded by <tagname> ... </tagname> */
|
|
|
|
/* if format==0, add a newline between nodes?? */
|
|
|
|
|
|
|
|
xmlBufferPtr buf;
|
|
|
|
xmlChar *result;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
buf = xmlBufferCreate();
|
|
|
|
|
|
|
|
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
|
|
|
|
{
|
|
|
|
xmlBufferWriteChar(buf, "<");
|
|
|
|
xmlBufferWriteCHAR(buf, toptagname);
|
|
|
|
xmlBufferWriteChar(buf, ">");
|
|
|
|
}
|
|
|
|
if (nodeset != NULL)
|
|
|
|
{
|
|
|
|
for (i = 0; i < nodeset->nodeNr; i++)
|
|
|
|
{
|
|
|
|
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
|
|
|
|
{
|
|
|
|
xmlBufferWriteChar(buf, "<");
|
|
|
|
xmlBufferWriteCHAR(buf, septagname);
|
|
|
|
xmlBufferWriteChar(buf, ">");
|
|
|
|
}
|
|
|
|
xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
|
|
|
|
|
|
|
|
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
|
|
|
|
{
|
|
|
|
xmlBufferWriteChar(buf, "</");
|
|
|
|
xmlBufferWriteCHAR(buf, septagname);
|
|
|
|
xmlBufferWriteChar(buf, ">");
|
|
|
|
}
|
|
|
|
if (format)
|
|
|
|
xmlBufferWriteChar(buf, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
|
|
|
|
{
|
|
|
|
xmlBufferWriteChar(buf, "</");
|
|
|
|
xmlBufferWriteCHAR(buf, toptagname);
|
|
|
|
xmlBufferWriteChar(buf, ">");
|
|
|
|
}
|
|
|
|
result = xmlStrdup(buf->content);
|
|
|
|
xmlBufferFree(buf);
|
|
|
|
return result;
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
2001-10-25 13:50:21 +08:00
|
|
|
static xmlChar *
|
|
|
|
pgxml_texttoxmlchar(text *textstring)
|
|
|
|
{
|
|
|
|
xmlChar *res;
|
|
|
|
int32 txsize;
|
|
|
|
|
|
|
|
txsize = VARSIZE(textstring) - VARHDRSZ;
|
|
|
|
res = (xmlChar *) palloc(txsize + 1);
|
|
|
|
memcpy((char *) res, VARDATA(textstring), txsize);
|
|
|
|
res[txsize] = '\0';
|
|
|
|
return res;
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(pgxml_xpath);
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pgxml_xpath(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2001-10-25 13:50:21 +08:00
|
|
|
xmlDocPtr doctree;
|
|
|
|
xmlXPathContextPtr ctxt;
|
|
|
|
xmlXPathObjectPtr res;
|
|
|
|
xmlChar *xpath,
|
|
|
|
*xpresstr,
|
|
|
|
*toptag,
|
|
|
|
*septag;
|
|
|
|
xmlXPathCompExprPtr comppath;
|
|
|
|
|
|
|
|
int32 docsize,
|
|
|
|
ressize;
|
|
|
|
text *t,
|
|
|
|
*xpres;
|
|
|
|
|
|
|
|
t = PG_GETARG_TEXT_P(0); /* document buffer */
|
|
|
|
xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */
|
|
|
|
toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
|
|
|
|
septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
|
|
|
|
|
|
|
|
docsize = VARSIZE(t) - VARHDRSZ;
|
|
|
|
|
|
|
|
pgxml_parser_init();
|
|
|
|
|
|
|
|
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
|
|
|
|
if (doctree == NULL)
|
2003-08-04 08:43:34 +08:00
|
|
|
{ /* not well-formed */
|
|
|
|
xmlCleanupParser();
|
2001-10-25 13:50:21 +08:00
|
|
|
PG_RETURN_NULL();
|
|
|
|
}
|
|
|
|
|
|
|
|
ctxt = xmlXPathNewContext(doctree);
|
|
|
|
ctxt->node = xmlDocGetRootElement(doctree);
|
|
|
|
|
|
|
|
/* compile the path */
|
|
|
|
comppath = xmlXPathCompile(xpath);
|
|
|
|
if (comppath == NULL)
|
|
|
|
{
|
2002-03-06 14:10:59 +08:00
|
|
|
elog(WARNING, "XPath syntax error");
|
2001-10-25 13:50:21 +08:00
|
|
|
xmlFreeDoc(doctree);
|
|
|
|
pfree((void *) xpath);
|
2002-12-06 11:44:14 +08:00
|
|
|
xmlCleanupParser();
|
2001-10-25 13:50:21 +08:00
|
|
|
PG_RETURN_NULL();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now evaluate the path expression. */
|
|
|
|
res = xmlXPathCompiledEval(comppath, ctxt);
|
|
|
|
xmlXPathFreeCompExpr(comppath);
|
|
|
|
|
|
|
|
if (res == NULL)
|
|
|
|
{
|
|
|
|
xmlFreeDoc(doctree);
|
|
|
|
pfree((void *) xpath);
|
2002-12-06 11:44:14 +08:00
|
|
|
xmlCleanupParser();
|
2001-10-25 13:50:21 +08:00
|
|
|
PG_RETURN_NULL(); /* seems appropriate */
|
|
|
|
}
|
|
|
|
/* now we dump this node, ?surrounding by tags? */
|
|
|
|
/* To do this, we look first at the type */
|
|
|
|
switch (res->type)
|
|
|
|
{
|
|
|
|
case XPATH_NODESET:
|
|
|
|
xpresstr = pgxmlNodeSetToText(res->nodesetval,
|
|
|
|
doctree,
|
|
|
|
toptag, septag, 0);
|
|
|
|
break;
|
|
|
|
case XPATH_STRING:
|
|
|
|
xpresstr = xmlStrdup(res->stringval);
|
|
|
|
break;
|
|
|
|
default:
|
2002-03-06 14:10:59 +08:00
|
|
|
elog(WARNING, "Unsupported XQuery result: %d", res->type);
|
2001-10-25 13:50:21 +08:00
|
|
|
xpresstr = xmlStrdup("<unsupported/>");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Now convert this result back to text */
|
|
|
|
ressize = strlen(xpresstr);
|
|
|
|
xpres = (text *) palloc(ressize + VARHDRSZ);
|
|
|
|
memcpy(VARDATA(xpres), xpresstr, ressize);
|
|
|
|
VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
|
|
|
|
|
|
|
|
/* Free various storage */
|
|
|
|
xmlFreeDoc(doctree);
|
|
|
|
pfree((void *) xpath);
|
|
|
|
xmlFree(xpresstr);
|
2002-12-06 11:44:14 +08:00
|
|
|
xmlCleanupParser();
|
2001-10-25 13:50:21 +08:00
|
|
|
PG_RETURN_TEXT_P(xpres);
|
2001-08-21 23:26:10 +08:00
|
|
|
}
|