Add Unicode support in PL/Python

PL/Python now accepts Unicode objects where it previously only accepted string
objects (for example, as return value).  Unicode objects are converted to the
PostgreSQL server encoding as necessary.

This change is also necessary for future Python 3 support, which treats all
strings as Unicode objects.

Since this removes the error conditions that the plpython_unicode test file
tested for, the alternative result files are no longer necessary.
This commit is contained in:
Peter Eisentraut 2009-09-12 22:13:12 +00:00
parent 9bb342811b
commit 4ab6ebf3f4
8 changed files with 175 additions and 143 deletions

View File

@ -2,7 +2,3 @@ Guide to alternative expected files:
plpython_error_2.out Python 2.2, 2.3, 2.4
plpython_error.out Python 2.5, 2.6
plpython_unicode_2.out Python 2.2
plpython_unicode_3.out Python 2.3, 2.4
plpython_unicode.out Python 2.5, 2.6

View File

@ -342,6 +342,19 @@ ERROR: unexpected return value from trigger procedure
DETAIL: Expected None, "OK", "SKIP", or "MODIFY".
CONTEXT: PL/Python function "stupid3"
DROP TRIGGER stupid_trigger3 ON trigger_test;
-- Unicode variant
CREATE FUNCTION stupid3u() RETURNS trigger
AS $$
return u"foo"
$$ LANGUAGE plpythonu;
CREATE TRIGGER stupid_trigger3
BEFORE UPDATE ON trigger_test
FOR EACH ROW EXECUTE PROCEDURE stupid3u();
UPDATE trigger_test SET v = 'null' WHERE i = 0;
ERROR: unexpected return value from trigger procedure
DETAIL: Expected None, "OK", "SKIP", or "MODIFY".
CONTEXT: PL/Python function "stupid3u"
DROP TRIGGER stupid_trigger3 ON trigger_test;
-- deleting the TD dictionary
CREATE FUNCTION stupid4() RETURNS trigger
AS $$
@ -398,6 +411,20 @@ ERROR: key "a" found in TD["new"] does not exist as a column in the triggering
CONTEXT: while modifying trigger row
PL/Python function "stupid7"
DROP TRIGGER stupid_trigger7 ON trigger_test;
-- Unicode variant
CREATE FUNCTION stupid7u() RETURNS trigger
AS $$
TD["new"] = {u'a': 'foo', u'b': 'bar'}
return "MODIFY"
$$ LANGUAGE plpythonu;
CREATE TRIGGER stupid_trigger7
BEFORE UPDATE ON trigger_test
FOR EACH ROW EXECUTE PROCEDURE stupid7u();
UPDATE trigger_test SET v = 'null' WHERE i = 0;
ERROR: key "a" found in TD["new"] does not exist as a column in the triggering row
CONTEXT: while modifying trigger row
PL/Python function "stupid7u"
DROP TRIGGER stupid_trigger7 ON trigger_test;
-- calling a trigger function directly
SELECT stupid7();
ERROR: trigger functions can only be called as triggers

View File

@ -4,42 +4,47 @@
CREATE TABLE unicode_test (
testvalue text NOT NULL
);
CREATE FUNCTION unicode_return_error() RETURNS text AS E'
CREATE FUNCTION unicode_return() RETURNS text AS E'
return u"\\x80"
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E'
CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
TD["new"]["testvalue"] = u"\\x80"
return "MODIFY"
' LANGUAGE plpythonu;
CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error();
CREATE FUNCTION unicode_plan_error1() RETURNS text AS E'
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
CREATE FUNCTION unicode_plan1() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
rv = plpy.execute(plan, [u"\\x80"], 1)
return rv[0]["testvalue"]
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_plan_error2() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"])
rv = plpy.execute(plan, u"\\x80", 1)
return rv[0]["testvalue1"]
CREATE FUNCTION unicode_plan2() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"])
rv = plpy.execute(plan, ["foo", "bar"], 1)
return rv[0]["testvalue"]
' LANGUAGE plpythonu;
SELECT unicode_return_error();
ERROR: PL/Python: could not create string representation of Python object
DETAIL: <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: while creating return value
PL/Python function "unicode_return_error"
SELECT unicode_return();
unicode_return
----------------
\u0080
(1 row)
INSERT INTO unicode_test (testvalue) VALUES ('test');
ERROR: PL/Python: could not create string representation of Python object
DETAIL: <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: while modifying trigger row
PL/Python function "unicode_trigger_error"
SELECT unicode_plan_error1();
WARNING: PL/Python: <class 'plpy.Error'>: unrecognized error in PLy_spi_execute_plan
CONTEXT: PL/Python function "unicode_plan_error1"
ERROR: PL/Python: could not execute plan
DETAIL: <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: PL/Python function "unicode_plan_error1"
SELECT unicode_plan_error2();
ERROR: PL/Python: could not execute plan
DETAIL: <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: PL/Python function "unicode_plan_error2"
SELECT * FROM unicode_test;
testvalue
-----------
\u0080
(1 row)
SELECT unicode_plan1();
unicode_plan1
---------------
\u0080
(1 row)
SELECT unicode_plan2();
unicode_plan2
---------------
foobar
(1 row)

View File

@ -1,45 +0,0 @@
--
-- Unicode handling
--
CREATE TABLE unicode_test (
testvalue text NOT NULL
);
CREATE FUNCTION unicode_return_error() RETURNS text AS E'
return u"\\x80"
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E'
TD["new"]["testvalue"] = u"\\x80"
return "MODIFY"
' LANGUAGE plpythonu;
CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error();
CREATE FUNCTION unicode_plan_error1() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
rv = plpy.execute(plan, [u"\\x80"], 1)
return rv[0]["testvalue"]
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_plan_error2() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"])
rv = plpy.execute(plan, u"\\x80", 1)
return rv[0]["testvalue1"]
' LANGUAGE plpythonu;
SELECT unicode_return_error();
ERROR: PL/Python: could not create string representation of Python object
DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
CONTEXT: while creating return value
PL/Python function "unicode_return_error"
INSERT INTO unicode_test (testvalue) VALUES ('test');
ERROR: PL/Python: could not create string representation of Python object
DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
CONTEXT: while modifying trigger row
PL/Python function "unicode_trigger_error"
SELECT unicode_plan_error1();
WARNING: PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan
CONTEXT: PL/Python function "unicode_plan_error1"
ERROR: PL/Python: could not execute plan
DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
CONTEXT: PL/Python function "unicode_plan_error1"
SELECT unicode_plan_error2();
ERROR: PL/Python: could not execute plan
DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
CONTEXT: PL/Python function "unicode_plan_error2"

View File

@ -1,45 +0,0 @@
--
-- Unicode handling
--
CREATE TABLE unicode_test (
testvalue text NOT NULL
);
CREATE FUNCTION unicode_return_error() RETURNS text AS E'
return u"\\x80"
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E'
TD["new"]["testvalue"] = u"\\x80"
return "MODIFY"
' LANGUAGE plpythonu;
CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error();
CREATE FUNCTION unicode_plan_error1() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
rv = plpy.execute(plan, [u"\\x80"], 1)
return rv[0]["testvalue"]
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_plan_error2() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"])
rv = plpy.execute(plan, u"\\x80", 1)
return rv[0]["testvalue1"]
' LANGUAGE plpythonu;
SELECT unicode_return_error();
ERROR: PL/Python: could not create string representation of Python object
DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: while creating return value
PL/Python function "unicode_return_error"
INSERT INTO unicode_test (testvalue) VALUES ('test');
ERROR: PL/Python: could not create string representation of Python object
DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: while modifying trigger row
PL/Python function "unicode_trigger_error"
SELECT unicode_plan_error1();
WARNING: PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan
CONTEXT: PL/Python function "unicode_plan_error1"
ERROR: PL/Python: could not execute plan
DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: PL/Python function "unicode_plan_error1"
SELECT unicode_plan_error2();
ERROR: PL/Python: could not execute plan
DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
CONTEXT: PL/Python function "unicode_plan_error2"

View File

@ -1,7 +1,7 @@
/**********************************************************************
* plpython.c - python as a procedural language for PostgreSQL
*
* $PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.128 2009/09/09 19:00:09 petere Exp $
* $PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.129 2009/09/12 22:13:12 petere Exp $
*
*********************************************************************
*/
@ -54,6 +54,7 @@ typedef int Py_ssize_t;
#include "executor/spi.h"
#include "funcapi.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "parser/parse_type.h"
@ -238,6 +239,9 @@ static void *PLy_malloc0(size_t);
static char *PLy_strdup(const char *);
static void PLy_free(void *);
static PyObject*PLyUnicode_Str(PyObject *unicode);
static char *PLyUnicode_AsString(PyObject *unicode);
/* sub handlers for functions and triggers */
static Datum PLy_function_handler(FunctionCallInfo fcinfo, PLyProcedure *);
static HeapTuple PLy_trigger_handler(FunctionCallInfo fcinfo, PLyProcedure *);
@ -474,13 +478,19 @@ PLy_trigger_handler(FunctionCallInfo fcinfo, PLyProcedure *proc)
{
char *srv;
if (!PyString_Check(plrv))
if (PyString_Check(plrv))
srv = PyString_AsString(plrv);
else if (PyUnicode_Check(plrv))
srv = PLyUnicode_AsString(plrv);
else
{
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("unexpected return value from trigger procedure"),
errdetail("Expected None or a string.")));
srv = NULL; /* keep compiler quiet */
}
srv = PyString_AsString(plrv);
if (pg_strcasecmp(srv, "SKIP") == 0)
rv = NULL;
else if (pg_strcasecmp(srv, "MODIFY") == 0)
@ -572,15 +582,24 @@ PLy_modify_tuple(PLyProcedure *proc, PyObject *pltd, TriggerData *tdata,
for (i = 0; i < natts; i++)
{
char *plattstr;
platt = PyList_GetItem(plkeys, i);
if (!PyString_Check(platt))
if (PyString_Check(platt))
plattstr = PyString_AsString(platt);
else if (PyUnicode_Check(platt))
plattstr = PLyUnicode_AsString(platt);
else
{
ereport(ERROR,
(errmsg("TD[\"new\"] dictionary key at ordinal position %d is not a string", i)));
attn = SPI_fnumber(tupdesc, PyString_AsString(platt));
plattstr = NULL; /* keep compiler quiet */
}
attn = SPI_fnumber(tupdesc, plattstr);
if (attn == SPI_ERROR_NOATTRIBUTE)
ereport(ERROR,
(errmsg("key \"%s\" found in TD[\"new\"] does not exist as a column in the triggering row",
PyString_AsString(platt))));
plattstr)));
atti = attn - 1;
plval = PyDict_GetItem(plntup, platt);
@ -1942,6 +1961,9 @@ PLyObject_ToDatum(PLyTypeInfo *info,
Assert(plrv != Py_None);
if (PyUnicode_Check(plrv))
plrv_so = PLyUnicode_Str(plrv);
else
plrv_so = PyObject_Str(plrv);
if (!plrv_so)
PLy_elog(ERROR, "could not create string representation of Python object");
@ -2562,10 +2584,16 @@ PLy_spi_prepare(PyObject *self, PyObject *args)
Form_pg_type typeStruct;
optr = PySequence_GetItem(list, i);
if (!PyString_Check(optr))
if (PyString_Check(optr))
sptr = PyString_AsString(optr);
else if (PyUnicode_Check(optr))
sptr = PLyUnicode_AsString(optr);
else
{
ereport(ERROR,
(errmsg("plpy.prepare: type name at ordinal position %d is not a string", i)));
sptr = PyString_AsString(optr);
sptr = NULL; /* keep compiler quiet */
}
/********************************************************
* Resolve argument type names and then look them up by
@ -2670,7 +2698,7 @@ PLy_spi_execute_plan(PyObject *ob, PyObject *list, long limit)
if (list != NULL)
{
if (!PySequence_Check(list) || PyString_Check(list))
if (!PySequence_Check(list) || PyString_Check(list) || PyUnicode_Check(list))
{
PLy_exception_set(PLy_exc_spi_error, "plpy.execute takes a sequence as its second argument");
return NULL;
@ -2714,6 +2742,9 @@ PLy_spi_execute_plan(PyObject *ob, PyObject *list, long limit)
elem = PySequence_GetItem(list, j);
if (elem != Py_None)
{
if (PyUnicode_Check(elem))
so = PLyUnicode_Str(elem);
else
so = PyObject_Str(elem);
if (!so)
PLy_elog(ERROR, "could not execute plan");
@ -3303,3 +3334,32 @@ PLy_free(void *ptr)
{
free(ptr);
}
/*
* Convert a Python unicode object to a Python string object in
* PostgreSQL server encoding. Reference ownership is passed to the
* caller.
*/
static PyObject*
PLyUnicode_Str(PyObject *unicode)
{
/*
* This assumes that the PostgreSQL encoding names are acceptable
* to Python, but that appears to be the case.
*/
return PyUnicode_AsEncodedString(unicode, GetDatabaseEncodingName(), "strict");
}
/*
* Convert a Python unicode object to a C string in PostgreSQL server
* encoding. No Python object reference is passed out of this
* function.
*/
static char *
PLyUnicode_AsString(PyObject *unicode)
{
PyObject *o = PLyUnicode_Str(unicode);
char *rv = PyString_AsString(o);
Py_XDECREF(o);
return rv;
}

View File

@ -159,6 +159,22 @@ UPDATE trigger_test SET v = 'null' WHERE i = 0;
DROP TRIGGER stupid_trigger3 ON trigger_test;
-- Unicode variant
CREATE FUNCTION stupid3u() RETURNS trigger
AS $$
return u"foo"
$$ LANGUAGE plpythonu;
CREATE TRIGGER stupid_trigger3
BEFORE UPDATE ON trigger_test
FOR EACH ROW EXECUTE PROCEDURE stupid3u();
UPDATE trigger_test SET v = 'null' WHERE i = 0;
DROP TRIGGER stupid_trigger3 ON trigger_test;
-- deleting the TD dictionary
CREATE FUNCTION stupid4() RETURNS trigger
@ -227,6 +243,23 @@ UPDATE trigger_test SET v = 'null' WHERE i = 0;
DROP TRIGGER stupid_trigger7 ON trigger_test;
-- Unicode variant
CREATE FUNCTION stupid7u() RETURNS trigger
AS $$
TD["new"] = {u'a': 'foo', u'b': 'bar'}
return "MODIFY"
$$ LANGUAGE plpythonu;
CREATE TRIGGER stupid_trigger7
BEFORE UPDATE ON trigger_test
FOR EACH ROW EXECUTE PROCEDURE stupid7u();
UPDATE trigger_test SET v = 'null' WHERE i = 0;
DROP TRIGGER stupid_trigger7 ON trigger_test;
-- calling a trigger function directly
SELECT stupid7();

View File

@ -6,32 +6,33 @@ CREATE TABLE unicode_test (
testvalue text NOT NULL
);
CREATE FUNCTION unicode_return_error() RETURNS text AS E'
CREATE FUNCTION unicode_return() RETURNS text AS E'
return u"\\x80"
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E'
CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
TD["new"]["testvalue"] = u"\\x80"
return "MODIFY"
' LANGUAGE plpythonu;
CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error();
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
CREATE FUNCTION unicode_plan_error1() RETURNS text AS E'
CREATE FUNCTION unicode_plan1() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
rv = plpy.execute(plan, [u"\\x80"], 1)
return rv[0]["testvalue"]
' LANGUAGE plpythonu;
CREATE FUNCTION unicode_plan_error2() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"])
rv = plpy.execute(plan, u"\\x80", 1)
return rv[0]["testvalue1"]
CREATE FUNCTION unicode_plan2() RETURNS text AS E'
plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"])
rv = plpy.execute(plan, ["foo", "bar"], 1)
return rv[0]["testvalue"]
' LANGUAGE plpythonu;
SELECT unicode_return_error();
SELECT unicode_return();
INSERT INTO unicode_test (testvalue) VALUES ('test');
SELECT unicode_plan_error1();
SELECT unicode_plan_error2();
SELECT * FROM unicode_test;
SELECT unicode_plan1();
SELECT unicode_plan2();