mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
92288a1cf9
o Change all current CVS messages of NOTICE to WARNING. We were going to do this just before 7.3 beta but it has to be done now, as you will see below. o Change current INFO messages that should be controlled by client_min_messages to NOTICE. o Force remaining INFO messages, like from EXPLAIN, VACUUM VERBOSE, etc. to always go to the client. o Remove INFO from the client_min_messages options and add NOTICE. Seems we do need three non-ERROR elog levels to handle the various behaviors we need for these messages. Regression passed.
348 lines
6.9 KiB
C
348 lines
6.9 KiB
C
/********************************************************
|
|
* Interface code to parse an XML document using expat
|
|
********************************************************/
|
|
|
|
#include "postgres.h"
|
|
#include "fmgr.h"
|
|
|
|
#include "expat.h"
|
|
#include "pgxml.h"
|
|
|
|
/* Memory management - we make expat use standard pg MM */
|
|
|
|
XML_Memory_Handling_Suite mhs;
|
|
|
|
/* passthrough functions (palloc is a macro) */
|
|
|
|
static void *
|
|
pgxml_palloc(size_t size)
|
|
{
|
|
return palloc(size);
|
|
}
|
|
|
|
static void *
|
|
pgxml_repalloc(void *ptr, size_t size)
|
|
{
|
|
return repalloc(ptr, size);
|
|
}
|
|
|
|
static void
|
|
pgxml_pfree(void *ptr)
|
|
{
|
|
return pfree(ptr);
|
|
}
|
|
|
|
static void
|
|
pgxml_mhs_init()
|
|
{
|
|
mhs.malloc_fcn = pgxml_palloc;
|
|
mhs.realloc_fcn = pgxml_repalloc;
|
|
mhs.free_fcn = pgxml_pfree;
|
|
}
|
|
|
|
static void
|
|
pgxml_handler_init()
|
|
{
|
|
/*
|
|
* This code should set up the relevant handlers from user-supplied
|
|
* settings. Quite how these settings are made is another matter :)
|
|
*/
|
|
}
|
|
|
|
/* Returns true if document is well-formed */
|
|
|
|
PG_FUNCTION_INFO_V1(pgxml_parse);
|
|
|
|
Datum
|
|
pgxml_parse(PG_FUNCTION_ARGS)
|
|
{
|
|
/* called as pgxml_parse(document) */
|
|
XML_Parser p;
|
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
|
int32 docsize = VARSIZE(t) - VARHDRSZ;
|
|
|
|
pgxml_mhs_init();
|
|
|
|
pgxml_handler_init();
|
|
|
|
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
|
|
if (!p)
|
|
{
|
|
elog(ERROR, "pgxml: Could not create expat parser");
|
|
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
|
|
}
|
|
|
|
if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
|
|
{
|
|
/*
|
|
* elog(WARNING, "Parse error at line %d:%s",
|
|
* XML_GetCurrentLineNumber(p),
|
|
* XML_ErrorString(XML_GetErrorCode(p)));
|
|
*/
|
|
XML_ParserFree(p);
|
|
PG_RETURN_BOOL(false);
|
|
}
|
|
|
|
XML_ParserFree(p);
|
|
PG_RETURN_BOOL(true);
|
|
}
|
|
|
|
/* XPath handling functions */
|
|
|
|
/* XPath support here is for a very skeletal kind of XPath!
|
|
It was easy to program though... */
|
|
|
|
/* This first is the core function that builds a result set. The
|
|
actual functions called by the user manipulate that result set
|
|
in various ways.
|
|
*/
|
|
|
|
static XPath_Results *
|
|
build_xpath_results(text *doc, text *pathstr)
|
|
{
|
|
XPath_Results *xpr;
|
|
char *res;
|
|
pgxml_udata *udata;
|
|
XML_Parser p;
|
|
int32 docsize;
|
|
|
|
xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
|
|
memset((void *) xpr, 0, sizeof(XPath_Results));
|
|
xpr->rescount = 0;
|
|
|
|
docsize = VARSIZE(doc) - VARHDRSZ;
|
|
|
|
/* res isn't going to be the real return type, it is just a buffer */
|
|
|
|
res = (char *) palloc(docsize);
|
|
memset((void *) res, 0, docsize);
|
|
|
|
xpr->resbuf = res;
|
|
|
|
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
|
|
memset((void *) udata, 0, sizeof(pgxml_udata));
|
|
|
|
udata->currentpath[0] = '\0';
|
|
udata->textgrab = 0;
|
|
|
|
udata->path = (char *) palloc(VARSIZE(pathstr));
|
|
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
|
|
|
|
udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
|
|
|
|
udata->resptr = res;
|
|
udata->reslen = 0;
|
|
|
|
udata->xpres = xpr;
|
|
|
|
/* Now fire up the parser */
|
|
pgxml_mhs_init();
|
|
|
|
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
|
|
if (!p)
|
|
{
|
|
elog(ERROR, "pgxml: Could not create expat parser");
|
|
pfree(xpr);
|
|
pfree(udata->path);
|
|
pfree(udata);
|
|
pfree(res);
|
|
return NULL;
|
|
}
|
|
XML_SetUserData(p, (void *) udata);
|
|
|
|
/* Set the handlers */
|
|
|
|
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
|
|
XML_SetCharacterDataHandler(p, pgxml_charhandler);
|
|
|
|
if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
|
|
{
|
|
/*
|
|
* elog(WARNING, "Parse error at line %d:%s",
|
|
* XML_GetCurrentLineNumber(p),
|
|
* XML_ErrorString(XML_GetErrorCode(p)));
|
|
*/
|
|
XML_ParserFree(p);
|
|
pfree(xpr);
|
|
pfree(udata->path);
|
|
pfree(udata);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
pfree(udata->path);
|
|
pfree(udata);
|
|
XML_ParserFree(p);
|
|
return xpr;
|
|
}
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(pgxml_xpath);
|
|
|
|
Datum
|
|
pgxml_xpath(PG_FUNCTION_ARGS)
|
|
{
|
|
/* called as pgxml_xpath(document,pathstr, index) for the moment */
|
|
|
|
XPath_Results *xpresults;
|
|
text *restext;
|
|
|
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
|
text *t2 = PG_GETARG_TEXT_P(1);
|
|
int32 ind = PG_GETARG_INT32(2) - 1;
|
|
|
|
xpresults = build_xpath_results(t, t2);
|
|
|
|
/*
|
|
* This needs to be changed depending on the mechanism for returning
|
|
* our set of results.
|
|
*/
|
|
|
|
if (xpresults == NULL) /* parse error (not WF or parser failure) */
|
|
PG_RETURN_NULL();
|
|
|
|
if (ind >= (xpresults->rescount))
|
|
PG_RETURN_NULL();
|
|
|
|
restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
|
|
memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
|
|
|
|
VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
|
|
|
|
pfree(xpresults->resbuf);
|
|
pfree(xpresults);
|
|
|
|
PG_RETURN_TEXT_P(restext);
|
|
}
|
|
|
|
|
|
static void
|
|
pgxml_pathcompare(void *userData)
|
|
{
|
|
char *matchpos;
|
|
|
|
matchpos = strstr(UD->currentpath, UD->path);
|
|
|
|
if (matchpos == NULL)
|
|
{ /* Should we have more logic here ? */
|
|
if (UD->textgrab)
|
|
{
|
|
UD->textgrab = 0;
|
|
pgxml_finalisegrabbedtext(userData);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* OK, we have a match of some sort. Now we need to check that our
|
|
* match is anchored to the *end* of the string AND that it is
|
|
* immediately preceded by a '/'
|
|
*/
|
|
|
|
/*
|
|
* This test wouldn't work if strlen (UD->path) overran the length of
|
|
* the currentpath, but that's not possible because we got a match!
|
|
*/
|
|
|
|
if ((matchpos + strlen(UD->path))[0] == '\0')
|
|
{
|
|
if ((UD->path)[0] == '/')
|
|
{
|
|
if (matchpos == UD->currentpath)
|
|
UD->textgrab = 1;
|
|
}
|
|
else
|
|
{
|
|
if ((matchpos - 1)[0] == '/')
|
|
UD->textgrab = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
pgxml_starthandler(void *userData, const XML_Char * name,
|
|
const XML_Char ** atts)
|
|
{
|
|
|
|
char sepstr[] = "/";
|
|
|
|
if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
|
|
elog(WARNING, "Path too long");
|
|
else
|
|
{
|
|
strncat(UD->currentpath, sepstr, 1);
|
|
strcat(UD->currentpath, name);
|
|
}
|
|
if (UD->textgrab)
|
|
{
|
|
/*
|
|
* Depending on user preference, should we "reconstitute" the
|
|
* element into the result text?
|
|
*/
|
|
}
|
|
else
|
|
pgxml_pathcompare(userData);
|
|
}
|
|
|
|
static void
|
|
pgxml_endhandler(void *userData, const XML_Char * name)
|
|
{
|
|
/*
|
|
* Start by removing the current element off the end of the
|
|
* currentpath
|
|
*/
|
|
|
|
char *sepptr;
|
|
|
|
sepptr = strrchr(UD->currentpath, '/');
|
|
if (sepptr == NULL)
|
|
{
|
|
elog(ERROR, "There's a problem...");
|
|
sepptr = UD->currentpath;
|
|
}
|
|
if (strcmp(name, sepptr + 1) != 0)
|
|
{
|
|
elog(WARNING, "Wanted [%s], got [%s]", sepptr, name);
|
|
/* unmatched entry, so do nothing */
|
|
}
|
|
else
|
|
{
|
|
sepptr[0] = '\0'; /* Chop that element off the end */
|
|
}
|
|
|
|
if (UD->textgrab)
|
|
pgxml_pathcompare(userData);
|
|
|
|
}
|
|
|
|
static void
|
|
pgxml_charhandler(void *userData, const XML_Char * s, int len)
|
|
{
|
|
if (UD->textgrab)
|
|
{
|
|
if (len > 0)
|
|
{
|
|
memcpy(UD->resptr, s, len);
|
|
UD->resptr += len;
|
|
UD->reslen += len;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Should I be using PG list types here? */
|
|
|
|
static void
|
|
pgxml_finalisegrabbedtext(void *userData)
|
|
{
|
|
/* In res/reslen, we have a single result. */
|
|
UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
|
|
UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
|
|
UD->reslen = 0;
|
|
UD->xpres->rescount++;
|
|
|
|
/*
|
|
* This effectively concatenates all the results together but we do
|
|
* know where one ends and the next begins
|
|
*/
|
|
}
|