mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-27 07:21:09 +08:00
948 lines
21 KiB
C
948 lines
21 KiB
C
/* Parser interface for DOM-based parser (libxml) rather than
|
|
stream-based SAX-type parser */
|
|
|
|
#include "postgres.h"
|
|
#include "fmgr.h"
|
|
#include "executor/spi.h"
|
|
#include "funcapi.h"
|
|
#include "miscadmin.h"
|
|
#include "lib/stringinfo.h"
|
|
|
|
/* libxml includes */
|
|
|
|
#include <libxml/xpath.h>
|
|
#include <libxml/tree.h>
|
|
#include <libxml/xmlmemory.h>
|
|
#include <libxml/xmlerror.h>
|
|
#include <libxml/parserInternals.h>
|
|
|
|
|
|
PG_MODULE_MAGIC;
|
|
|
|
/* declarations */
|
|
|
|
static void *pgxml_palloc(size_t size);
|
|
static void *pgxml_repalloc(void *ptr, size_t size);
|
|
static void pgxml_pfree(void *ptr);
|
|
static char *pgxml_pstrdup(const char *string);
|
|
static void pgxml_errorHandler(void *ctxt, const char *msg,...);
|
|
|
|
void elog_error(int level, char *explain, int force);
|
|
void pgxml_parser_init(void);
|
|
|
|
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
|
|
xmlChar * toptagname, xmlChar * septagname,
|
|
xmlChar * plainsep);
|
|
|
|
text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar * toptag,
|
|
xmlChar * septag, xmlChar * plainsep);
|
|
|
|
xmlChar *pgxml_texttoxmlchar(text *textstring);
|
|
|
|
static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar * xpath);
|
|
|
|
|
|
Datum xml_is_well_formed(PG_FUNCTION_ARGS);
|
|
Datum xml_encode_special_chars(PG_FUNCTION_ARGS);
|
|
Datum xpath_nodeset(PG_FUNCTION_ARGS);
|
|
Datum xpath_string(PG_FUNCTION_ARGS);
|
|
Datum xpath_number(PG_FUNCTION_ARGS);
|
|
Datum xpath_bool(PG_FUNCTION_ARGS);
|
|
Datum xpath_list(PG_FUNCTION_ARGS);
|
|
Datum xpath_table(PG_FUNCTION_ARGS);
|
|
|
|
/* Global variables */
|
|
char *errbuf; /* per line error buffer */
|
|
char *pgxml_errorMsg = NULL; /* overall error message */
|
|
|
|
/* Convenience macros */
|
|
|
|
#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
|
|
#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
|
|
|
|
#define ERRBUF_SIZE 200
|
|
|
|
/* memory handling passthrough functions (e.g. palloc, pstrdup are
|
|
currently macros, and the others might become so...) */
|
|
|
|
static void *
|
|
pgxml_palloc(size_t size)
|
|
{
|
|
/* elog(DEBUG1,"Alloc %d in CMC %p",size,CurrentMemoryContext); */
|
|
return palloc(size);
|
|
}
|
|
|
|
static void *
|
|
pgxml_repalloc(void *ptr, size_t size)
|
|
{
|
|
/* elog(DEBUG1,"ReAlloc in CMC %p",CurrentMemoryContext);*/
|
|
return repalloc(ptr, size);
|
|
}
|
|
|
|
static void
|
|
pgxml_pfree(void *ptr)
|
|
{
|
|
/* elog(DEBUG1,"Free in CMC %p",CurrentMemoryContext); */
|
|
pfree(ptr);
|
|
}
|
|
|
|
static char *
|
|
pgxml_pstrdup(const char *string)
|
|
{
|
|
return pstrdup(string);
|
|
}
|
|
|
|
/* The error handling function. This formats an error message and sets
|
|
* a flag - an ereport will be issued prior to return
|
|
*/
|
|
|
|
static void
|
|
pgxml_errorHandler(void *ctxt, const char *msg,...)
|
|
{
|
|
va_list args;
|
|
|
|
va_start(args, msg);
|
|
vsnprintf(errbuf, ERRBUF_SIZE, msg, args);
|
|
va_end(args);
|
|
/* Now copy the argument across */
|
|
if (pgxml_errorMsg == NULL)
|
|
pgxml_errorMsg = pstrdup(errbuf);
|
|
else
|
|
{
|
|
int32 xsize = strlen(pgxml_errorMsg);
|
|
|
|
pgxml_errorMsg = repalloc(pgxml_errorMsg,
|
|
(size_t) (xsize + strlen(errbuf) + 1));
|
|
strncpy(&pgxml_errorMsg[xsize - 1], errbuf, strlen(errbuf));
|
|
pgxml_errorMsg[xsize + strlen(errbuf) - 1] = '\0';
|
|
|
|
}
|
|
memset(errbuf, 0, ERRBUF_SIZE);
|
|
}
|
|
|
|
/* This function reports the current message at the level specified */
|
|
void
|
|
elog_error(int level, char *explain, int force)
|
|
{
|
|
if (force || (pgxml_errorMsg != NULL))
|
|
{
|
|
if (pgxml_errorMsg == NULL)
|
|
{
|
|
ereport(level, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
|
|
errmsg(explain)));
|
|
}
|
|
else
|
|
{
|
|
ereport(level, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
|
|
errmsg("%s:%s", explain, pgxml_errorMsg)));
|
|
pfree(pgxml_errorMsg);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
pgxml_parser_init()
|
|
{
|
|
/*
|
|
* This code could also set parser settings from user-supplied info.
|
|
* Quite how these settings are made is another matter :)
|
|
*/
|
|
|
|
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
|
|
xmlInitParser();
|
|
|
|
xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
|
|
|
|
xmlSubstituteEntitiesDefault(1);
|
|
xmlLoadExtDtdDefaultValue = 1;
|
|
|
|
pgxml_errorMsg = NULL;
|
|
|
|
errbuf = palloc(200);
|
|
memset(errbuf, 0, 200);
|
|
|
|
}
|
|
|
|
|
|
/* Returns true if document is well-formed */
|
|
|
|
PG_FUNCTION_INFO_V1(xml_is_well_formed);
|
|
|
|
Datum
|
|
xml_is_well_formed(PG_FUNCTION_ARGS)
|
|
{
|
|
/* called as xml_is_well_formed(document) */
|
|
xmlDocPtr doctree;
|
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
|
int32 docsize = VARSIZE(t) - VARHDRSZ;
|
|
|
|
pgxml_parser_init();
|
|
|
|
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
|
|
if (doctree == NULL)
|
|
{
|
|
xmlCleanupParser();
|
|
PG_RETURN_BOOL(false); /* i.e. not well-formed */
|
|
}
|
|
xmlCleanupParser();
|
|
xmlFreeDoc(doctree);
|
|
PG_RETURN_BOOL(true);
|
|
}
|
|
|
|
|
|
/* Encodes special characters (<, >, &, " and \r) as XML entities */
|
|
|
|
PG_FUNCTION_INFO_V1(xml_encode_special_chars);
|
|
|
|
Datum
|
|
xml_encode_special_chars(PG_FUNCTION_ARGS)
|
|
{
|
|
text *tin = PG_GETARG_TEXT_P(0);
|
|
text *tout;
|
|
int32 ressize;
|
|
xmlChar *ts,
|
|
*tt;
|
|
|
|
ts = pgxml_texttoxmlchar(tin);
|
|
|
|
tt = xmlEncodeSpecialChars(NULL, ts);
|
|
|
|
pfree(ts);
|
|
|
|
ressize = strlen(tt);
|
|
tout = (text *) palloc(ressize + VARHDRSZ);
|
|
memcpy(VARDATA(tout), tt, ressize);
|
|
VARATT_SIZEP(tout) = ressize + VARHDRSZ;
|
|
|
|
xmlFree(tt);
|
|
|
|
PG_RETURN_TEXT_P(tout);
|
|
}
|
|
|
|
static xmlChar
|
|
*
|
|
pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
|
|
xmlChar * toptagname,
|
|
xmlChar * septagname,
|
|
xmlChar * plainsep)
|
|
{
|
|
/* Function translates a nodeset into a text representation */
|
|
|
|
/*
|
|
* iterates over each node in the set and calls xmlNodeDump to write it to
|
|
* an xmlBuffer -from which an xmlChar * string is returned.
|
|
*/
|
|
|
|
/* each representation is surrounded by <tagname> ... </tagname> */
|
|
|
|
/*
|
|
* plainsep is an ordinary (not tag) seperator - if used, then nodes are
|
|
* cast to string as output method
|
|
*/
|
|
|
|
|
|
xmlBufferPtr buf;
|
|
xmlChar *result;
|
|
int i;
|
|
|
|
buf = xmlBufferCreate();
|
|
|
|
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
|
|
{
|
|
xmlBufferWriteChar(buf, "<");
|
|
xmlBufferWriteCHAR(buf, toptagname);
|
|
xmlBufferWriteChar(buf, ">");
|
|
}
|
|
if (nodeset != NULL)
|
|
{
|
|
for (i = 0; i < nodeset->nodeNr; i++)
|
|
{
|
|
|
|
if (plainsep != NULL)
|
|
{
|
|
xmlBufferWriteCHAR(buf,
|
|
xmlXPathCastNodeToString(nodeset->nodeTab[i]));
|
|
|
|
/* If this isn't the last entry, write the plain sep. */
|
|
if (i < (nodeset->nodeNr) - 1)
|
|
xmlBufferWriteChar(buf, plainsep);
|
|
}
|
|
else
|
|
{
|
|
|
|
|
|
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
|
|
{
|
|
xmlBufferWriteChar(buf, "<");
|
|
xmlBufferWriteCHAR(buf, septagname);
|
|
xmlBufferWriteChar(buf, ">");
|
|
}
|
|
xmlNodeDump(buf,
|
|
nodeset->nodeTab[i]->doc,
|
|
nodeset->nodeTab[i],
|
|
1, 0);
|
|
|
|
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
|
|
{
|
|
xmlBufferWriteChar(buf, "</");
|
|
xmlBufferWriteCHAR(buf, septagname);
|
|
xmlBufferWriteChar(buf, ">");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
|
|
{
|
|
xmlBufferWriteChar(buf, "</");
|
|
xmlBufferWriteCHAR(buf, toptagname);
|
|
xmlBufferWriteChar(buf, ">");
|
|
}
|
|
result = xmlStrdup(buf->content);
|
|
xmlBufferFree(buf);
|
|
return result;
|
|
}
|
|
|
|
|
|
/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
|
|
* into the libxml2 representation
|
|
*/
|
|
|
|
xmlChar *
|
|
pgxml_texttoxmlchar(text *textstring)
|
|
{
|
|
xmlChar *res;
|
|
int32 txsize;
|
|
|
|
txsize = VARSIZE(textstring) - VARHDRSZ;
|
|
res = (xmlChar *) palloc(txsize + 1);
|
|
memcpy((char *) res, VARDATA(textstring), txsize);
|
|
res[txsize] = '\0';
|
|
return res;
|
|
}
|
|
|
|
/* Public visible XPath functions */
|
|
|
|
/* This is a "raw" xpath function. Check that it returns child elements
|
|
* properly
|
|
*/
|
|
|
|
PG_FUNCTION_INFO_V1(xpath_nodeset);
|
|
|
|
Datum
|
|
xpath_nodeset(PG_FUNCTION_ARGS)
|
|
{
|
|
xmlChar *xpath,
|
|
*toptag,
|
|
*septag;
|
|
int32 pathsize;
|
|
text
|
|
*xpathsupp,
|
|
*xpres;
|
|
|
|
/* PG_GETARG_TEXT_P(0) is document buffer */
|
|
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
|
|
|
|
toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
|
|
septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
|
|
|
|
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
|
|
|
|
xpath = pgxml_texttoxmlchar(xpathsupp);
|
|
|
|
xpres = pgxml_result_to_text(
|
|
pgxml_xpath(PG_GETARG_TEXT_P(0), xpath),
|
|
toptag, septag, NULL);
|
|
|
|
/* xmlCleanupParser(); done by result_to_text routine */
|
|
pfree(xpath);
|
|
|
|
if (xpres == NULL)
|
|
PG_RETURN_NULL();
|
|
PG_RETURN_TEXT_P(xpres);
|
|
}
|
|
|
|
/* The following function is almost identical, but returns the elements in */
|
|
/* a list. */
|
|
|
|
PG_FUNCTION_INFO_V1(xpath_list);
|
|
|
|
Datum
|
|
xpath_list(PG_FUNCTION_ARGS)
|
|
{
|
|
xmlChar *xpath,
|
|
*plainsep;
|
|
int32 pathsize;
|
|
text
|
|
*xpathsupp,
|
|
*xpres;
|
|
|
|
/* PG_GETARG_TEXT_P(0) is document buffer */
|
|
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
|
|
|
|
plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
|
|
|
|
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
|
|
|
|
xpath = pgxml_texttoxmlchar(xpathsupp);
|
|
|
|
xpres = pgxml_result_to_text(
|
|
pgxml_xpath(PG_GETARG_TEXT_P(0), xpath),
|
|
NULL, NULL, plainsep);
|
|
|
|
/* xmlCleanupParser(); done by result_to_text routine */
|
|
pfree(xpath);
|
|
|
|
if (xpres == NULL)
|
|
PG_RETURN_NULL();
|
|
PG_RETURN_TEXT_P(xpres);
|
|
}
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(xpath_string);
|
|
|
|
Datum
|
|
xpath_string(PG_FUNCTION_ARGS)
|
|
{
|
|
xmlChar *xpath;
|
|
int32 pathsize;
|
|
text
|
|
*xpathsupp,
|
|
*xpres;
|
|
|
|
/* PG_GETARG_TEXT_P(0) is document buffer */
|
|
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
|
|
|
|
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
|
|
|
|
/*
|
|
* We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
|
|
* at end
|
|
*/
|
|
/* We could try casting to string using the libxml function? */
|
|
|
|
xpath = (xmlChar *) palloc(pathsize + 9);
|
|
memcpy((char *) (xpath + 7), VARDATA(xpathsupp), pathsize);
|
|
strncpy((char *) xpath, "string(", 7);
|
|
xpath[pathsize + 7] = ')';
|
|
xpath[pathsize + 8] = '\0';
|
|
|
|
xpres = pgxml_result_to_text(
|
|
pgxml_xpath(PG_GETARG_TEXT_P(0), xpath),
|
|
NULL, NULL, NULL);
|
|
|
|
xmlCleanupParser();
|
|
pfree(xpath);
|
|
|
|
if (xpres == NULL)
|
|
PG_RETURN_NULL();
|
|
PG_RETURN_TEXT_P(xpres);
|
|
}
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(xpath_number);
|
|
|
|
Datum
|
|
xpath_number(PG_FUNCTION_ARGS)
|
|
{
|
|
xmlChar *xpath;
|
|
int32 pathsize;
|
|
text
|
|
*xpathsupp;
|
|
|
|
float4 fRes;
|
|
|
|
xmlXPathObjectPtr res;
|
|
|
|
/* PG_GETARG_TEXT_P(0) is document buffer */
|
|
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
|
|
|
|
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
|
|
|
|
xpath = pgxml_texttoxmlchar(xpathsupp);
|
|
|
|
res = pgxml_xpath(PG_GETARG_TEXT_P(0), xpath);
|
|
pfree(xpath);
|
|
|
|
if (res == NULL)
|
|
{
|
|
xmlCleanupParser();
|
|
PG_RETURN_NULL();
|
|
}
|
|
|
|
fRes = xmlXPathCastToNumber(res);
|
|
xmlCleanupParser();
|
|
if (xmlXPathIsNaN(fRes))
|
|
PG_RETURN_NULL();
|
|
|
|
PG_RETURN_FLOAT4(fRes);
|
|
|
|
}
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(xpath_bool);
|
|
|
|
Datum
|
|
xpath_bool(PG_FUNCTION_ARGS)
|
|
{
|
|
xmlChar *xpath;
|
|
int32 pathsize;
|
|
text
|
|
*xpathsupp;
|
|
|
|
int bRes;
|
|
|
|
xmlXPathObjectPtr res;
|
|
|
|
/* PG_GETARG_TEXT_P(0) is document buffer */
|
|
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
|
|
|
|
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
|
|
|
|
xpath = pgxml_texttoxmlchar(xpathsupp);
|
|
|
|
res = pgxml_xpath(PG_GETARG_TEXT_P(0), xpath);
|
|
pfree(xpath);
|
|
|
|
if (res == NULL)
|
|
{
|
|
xmlCleanupParser();
|
|
PG_RETURN_BOOL(false);
|
|
}
|
|
|
|
bRes = xmlXPathCastToBoolean(res);
|
|
xmlCleanupParser();
|
|
PG_RETURN_BOOL(bRes);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Core function to evaluate XPath query */
|
|
|
|
xmlXPathObjectPtr
|
|
pgxml_xpath(text *document, xmlChar * xpath)
|
|
{
|
|
|
|
xmlDocPtr doctree;
|
|
xmlXPathContextPtr ctxt;
|
|
xmlXPathObjectPtr res;
|
|
|
|
xmlXPathCompExprPtr comppath;
|
|
|
|
int32 docsize;
|
|
|
|
|
|
docsize = VARSIZE(document) - VARHDRSZ;
|
|
|
|
pgxml_parser_init();
|
|
|
|
doctree = xmlParseMemory((char *) VARDATA(document), docsize);
|
|
if (doctree == NULL)
|
|
{ /* not well-formed */
|
|
return NULL;
|
|
}
|
|
|
|
ctxt = xmlXPathNewContext(doctree);
|
|
ctxt->node = xmlDocGetRootElement(doctree);
|
|
|
|
|
|
/* compile the path */
|
|
comppath = xmlXPathCompile(xpath);
|
|
if (comppath == NULL)
|
|
{
|
|
xmlCleanupParser();
|
|
xmlFreeDoc(doctree);
|
|
elog_error(ERROR, "XPath Syntax Error", 1);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Now evaluate the path expression. */
|
|
res = xmlXPathCompiledEval(comppath, ctxt);
|
|
xmlXPathFreeCompExpr(comppath);
|
|
|
|
if (res == NULL)
|
|
{
|
|
xmlXPathFreeContext(ctxt);
|
|
/* xmlCleanupParser(); */
|
|
xmlFreeDoc(doctree);
|
|
|
|
return NULL;
|
|
}
|
|
/* xmlFreeDoc(doctree); */
|
|
return res;
|
|
}
|
|
|
|
text
|
|
*
|
|
pgxml_result_to_text(xmlXPathObjectPtr res,
|
|
xmlChar * toptag,
|
|
xmlChar * septag,
|
|
xmlChar * plainsep)
|
|
{
|
|
xmlChar *xpresstr;
|
|
int32 ressize;
|
|
text *xpres;
|
|
|
|
if (res == NULL)
|
|
{
|
|
xmlCleanupParser();
|
|
return NULL;
|
|
}
|
|
switch (res->type)
|
|
{
|
|
case XPATH_NODESET:
|
|
xpresstr = pgxmlNodeSetToText(res->nodesetval,
|
|
toptag,
|
|
septag, plainsep);
|
|
break;
|
|
|
|
case XPATH_STRING:
|
|
xpresstr = xmlStrdup(res->stringval);
|
|
break;
|
|
|
|
default:
|
|
elog(NOTICE, "unsupported XQuery result: %d", res->type);
|
|
xpresstr = xmlStrdup("<unsupported/>");
|
|
}
|
|
|
|
|
|
/* Now convert this result back to text */
|
|
ressize = strlen(xpresstr);
|
|
xpres = (text *) palloc(ressize + VARHDRSZ);
|
|
memcpy(VARDATA(xpres), xpresstr, ressize);
|
|
VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
|
|
|
|
/* Free various storage */
|
|
xmlCleanupParser();
|
|
/* xmlFreeDoc(doctree); -- will die at end of tuple anyway */
|
|
|
|
xmlFree(xpresstr);
|
|
|
|
elog_error(ERROR, "XPath error", 0);
|
|
|
|
|
|
return xpres;
|
|
}
|
|
|
|
/* xpath_table is a table function. It needs some tidying (as do the
|
|
* other functions here!
|
|
*/
|
|
|
|
PG_FUNCTION_INFO_V1(xpath_table);
|
|
|
|
Datum
|
|
xpath_table(PG_FUNCTION_ARGS)
|
|
{
|
|
/* SPI (input tuple) support */
|
|
SPITupleTable *tuptable;
|
|
HeapTuple spi_tuple;
|
|
TupleDesc spi_tupdesc;
|
|
|
|
/* Output tuple (tuplestore) support */
|
|
Tuplestorestate *tupstore = NULL;
|
|
TupleDesc ret_tupdesc;
|
|
HeapTuple ret_tuple;
|
|
|
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
|
AttInMetadata *attinmeta;
|
|
MemoryContext per_query_ctx;
|
|
MemoryContext oldcontext;
|
|
|
|
/* Function parameters */
|
|
char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0));
|
|
char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1));
|
|
char *relname = GET_STR(PG_GETARG_TEXT_P(2));
|
|
char *xpathset = GET_STR(PG_GETARG_TEXT_P(3));
|
|
char *condition = GET_STR(PG_GETARG_TEXT_P(4));
|
|
|
|
char **values;
|
|
xmlChar **xpaths;
|
|
xmlChar *pos;
|
|
xmlChar *pathsep = "|";
|
|
|
|
int numpaths;
|
|
int ret;
|
|
int proc;
|
|
int i;
|
|
int j;
|
|
int rownr; /* For issuing multiple rows from one original
|
|
* document */
|
|
int had_values; /* To determine end of nodeset results */
|
|
|
|
StringInfoData query_buf;
|
|
|
|
/* We only have a valid tuple description in table function mode */
|
|
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("set-valued function called in context that cannot accept a set")));
|
|
if (rsinfo->expectedDesc == NULL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("xpath_table must be called as a table function")));
|
|
|
|
/*
|
|
* We want to materialise because it means that we don't have to carry
|
|
* libxml2 parser state between invocations of this function
|
|
*/
|
|
if (!(rsinfo->allowedModes & SFRM_Materialize))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("xpath_table requires Materialize mode, but it is not "
|
|
"allowed in this context")));
|
|
|
|
/*
|
|
* The tuplestore must exist in a higher context than this function call
|
|
* (per_query_ctx is used)
|
|
*/
|
|
|
|
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
|
|
oldcontext = MemoryContextSwitchTo(per_query_ctx);
|
|
|
|
/*
|
|
* Create the tuplestore - work_mem is the max in-memory size before a
|
|
* file is created on disk to hold it.
|
|
*/
|
|
tupstore = tuplestore_begin_heap(true, false, work_mem);
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
/* get the requested return tuple description */
|
|
ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
|
|
|
|
/*
|
|
* At the moment we assume that the returned attributes make sense for the
|
|
* XPath specififed (i.e. we trust the caller). It's not fatal if they get
|
|
* it wrong - the input function for the column type will raise an error
|
|
* if the path result can't be converted into the correct binary
|
|
* representation.
|
|
*/
|
|
|
|
attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
|
|
|
|
/* Set return mode and allocate value space. */
|
|
rsinfo->returnMode = SFRM_Materialize;
|
|
rsinfo->setDesc = ret_tupdesc;
|
|
|
|
values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
|
|
|
|
xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
|
|
|
|
/* Split XPaths. xpathset is a writable CString. */
|
|
|
|
/* Note that we stop splitting once we've done all needed for tupdesc */
|
|
|
|
numpaths = 0;
|
|
pos = xpathset;
|
|
do
|
|
{
|
|
xpaths[numpaths] = pos;
|
|
pos = strstr(pos, pathsep);
|
|
if (pos != NULL)
|
|
{
|
|
*pos = '\0';
|
|
pos++;
|
|
}
|
|
numpaths++;
|
|
} while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1)));
|
|
|
|
/* Now build query */
|
|
initStringInfo(&query_buf);
|
|
|
|
/* Build initial sql statement */
|
|
appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
|
|
pkeyfield,
|
|
xmlfield,
|
|
relname,
|
|
condition
|
|
);
|
|
|
|
|
|
if ((ret = SPI_connect()) < 0)
|
|
elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
|
|
|
|
if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
|
|
elog(ERROR, "xpath_table: SPI execution failed for query %s", query_buf.data);
|
|
|
|
proc = SPI_processed;
|
|
/* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
|
|
tuptable = SPI_tuptable;
|
|
spi_tupdesc = tuptable->tupdesc;
|
|
|
|
/* Switch out of SPI context */
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
|
/* Check that SPI returned correct result. If you put a comma into one of
|
|
* the function parameters, this will catch it when the SPI query returns
|
|
* e.g. 3 columns.
|
|
*/
|
|
|
|
if (spi_tupdesc->natts != 2)
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("expression returning multiple columns is not valid in parameter list"),
|
|
errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
|
|
}
|
|
|
|
/* Setup the parser. Beware that this must happen in the same context as the
|
|
* cleanup - which means that any error from here on must do cleanup to
|
|
* ensure that the entity table doesn't get freed by being out of context.
|
|
*/
|
|
pgxml_parser_init();
|
|
|
|
/* For each row i.e. document returned from SPI */
|
|
for (i = 0; i < proc; i++)
|
|
{
|
|
char *pkey;
|
|
char *xmldoc;
|
|
|
|
xmlDocPtr doctree;
|
|
xmlXPathContextPtr ctxt;
|
|
xmlXPathObjectPtr res;
|
|
xmlChar *resstr;
|
|
|
|
|
|
xmlXPathCompExprPtr comppath;
|
|
|
|
/* Extract the row data as C Strings */
|
|
|
|
spi_tuple = tuptable->vals[i];
|
|
pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
|
|
xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
|
|
|
|
|
|
/*
|
|
* Clear the values array, so that not-well-formed documents return
|
|
* NULL in all columns.
|
|
*/
|
|
|
|
/* Note that this also means that spare columns will be NULL. */
|
|
for (j = 0; j < ret_tupdesc->natts; j++)
|
|
values[j] = NULL;
|
|
|
|
/* Insert primary key */
|
|
values[0] = pkey;
|
|
|
|
/* Parse the document */
|
|
doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
|
|
|
|
if (doctree == NULL)
|
|
{ /* not well-formed, so output all-NULL tuple */
|
|
|
|
ret_tuple = BuildTupleFromCStrings(attinmeta, values);
|
|
oldcontext = MemoryContextSwitchTo(per_query_ctx);
|
|
tuplestore_puttuple(tupstore, ret_tuple);
|
|
MemoryContextSwitchTo(oldcontext);
|
|
heap_freetuple(ret_tuple);
|
|
}
|
|
else
|
|
{
|
|
/* New loop here - we have to deal with nodeset results */
|
|
rownr = 0;
|
|
|
|
do
|
|
{
|
|
/* Now evaluate the set of xpaths. */
|
|
had_values = 0;
|
|
for (j = 0; j < numpaths; j++)
|
|
{
|
|
|
|
ctxt = xmlXPathNewContext(doctree);
|
|
ctxt->node = xmlDocGetRootElement(doctree);
|
|
xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler);
|
|
|
|
/* compile the path */
|
|
comppath = xmlXPathCompile(xpaths[j]);
|
|
if (comppath == NULL)
|
|
{
|
|
xmlCleanupParser();
|
|
xmlFreeDoc(doctree);
|
|
|
|
elog_error(ERROR, "XPath Syntax Error", 1);
|
|
|
|
PG_RETURN_NULL(); /* Keep compiler happy */
|
|
}
|
|
|
|
/* Now evaluate the path expression. */
|
|
res = xmlXPathCompiledEval(comppath, ctxt);
|
|
xmlXPathFreeCompExpr(comppath);
|
|
|
|
if (res != NULL)
|
|
{
|
|
switch (res->type)
|
|
{
|
|
case XPATH_NODESET:
|
|
/* We see if this nodeset has enough nodes */
|
|
if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr))
|
|
{
|
|
resstr =
|
|
xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
|
|
had_values = 1;
|
|
}
|
|
else
|
|
resstr = NULL;
|
|
|
|
break;
|
|
|
|
case XPATH_STRING:
|
|
resstr = xmlStrdup(res->stringval);
|
|
break;
|
|
|
|
default:
|
|
elog(NOTICE, "unsupported XQuery result: %d", res->type);
|
|
resstr = xmlStrdup("<unsupported/>");
|
|
}
|
|
|
|
|
|
/*
|
|
* Insert this into the appropriate column in the
|
|
* result tuple.
|
|
*/
|
|
values[j + 1] = resstr;
|
|
}
|
|
xmlXPathFreeContext(ctxt);
|
|
}
|
|
/* Now add the tuple to the output, if there is one. */
|
|
if (had_values)
|
|
{
|
|
ret_tuple = BuildTupleFromCStrings(attinmeta, values);
|
|
oldcontext = MemoryContextSwitchTo(per_query_ctx);
|
|
tuplestore_puttuple(tupstore, ret_tuple);
|
|
MemoryContextSwitchTo(oldcontext);
|
|
heap_freetuple(ret_tuple);
|
|
}
|
|
|
|
rownr++;
|
|
|
|
} while (had_values);
|
|
|
|
}
|
|
|
|
xmlFreeDoc(doctree);
|
|
|
|
pfree(pkey);
|
|
pfree(xmldoc);
|
|
}
|
|
|
|
xmlCleanupParser();
|
|
/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */
|
|
tuplestore_donestoring(tupstore);
|
|
|
|
SPI_finish();
|
|
|
|
rsinfo->setResult = tupstore;
|
|
|
|
/*
|
|
* SFRM_Materialize mode expects us to return a NULL Datum. The actual
|
|
* tuples are in our tuplestore and passed back through rsinfo->setResult.
|
|
* rsinfo->setDesc is set to the tuple description that we actually used
|
|
* to build our tuples with, so the caller can verify we did what it was
|
|
* expecting.
|
|
*/
|
|
return (Datum) 0;
|
|
|
|
}
|