diff --git a/src/pl/plpython/expected/plpython_unicode_2.out b/src/pl/plpython/expected/plpython_unicode_2.out deleted file mode 100644 index d6bd823db8..0000000000 --- a/src/pl/plpython/expected/plpython_unicode_2.out +++ /dev/null @@ -1,52 +0,0 @@ --- --- Unicode handling --- -CREATE TABLE unicode_test ( - testvalue text NOT NULL -); -CREATE FUNCTION unicode_return() RETURNS text AS E' -return u"\\x80" -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_trigger() RETURNS trigger AS E' -TD["new"]["testvalue"] = u"\\x80" -return "MODIFY" -' LANGUAGE plpythonu; -CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test - FOR EACH ROW EXECUTE PROCEDURE unicode_trigger(); -CREATE FUNCTION unicode_plan1() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue", ["text"]) -rv = plpy.execute(plan, [u"\\x80"], 1) -return rv[0]["testvalue"] -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_plan2() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"]) -rv = plpy.execute(plan, ["foo", "bar"], 1) -return rv[0]["testvalue"] -' LANGUAGE plpythonu; -SELECT unicode_return(); -ERROR: PL/Python: could not convert Python Unicode object to PostgreSQL server encoding -DETAIL: UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: while creating return value -PL/Python function "unicode_return" -INSERT INTO unicode_test (testvalue) VALUES ('test'); -ERROR: PL/Python: could not convert Python Unicode object to PostgreSQL server encoding -DETAIL: UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: while modifying trigger row -PL/Python function "unicode_trigger" -SELECT * FROM unicode_test; - testvalue ------------ -(0 rows) - -SELECT unicode_plan1(); -WARNING: PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan -CONTEXT: PL/Python function "unicode_plan1" -ERROR: PL/Python: could not convert Python Unicode object to PostgreSQL server encoding -DETAIL: UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan1" -SELECT unicode_plan2(); - unicode_plan2 ---------------- - foobar -(1 row) - diff --git a/src/pl/plpython/expected/plpython_unicode_3.out b/src/pl/plpython/expected/plpython_unicode_3.out deleted file mode 100644 index 676845de4d..0000000000 --- a/src/pl/plpython/expected/plpython_unicode_3.out +++ /dev/null @@ -1,52 +0,0 @@ --- --- Unicode handling --- -CREATE TABLE unicode_test ( - testvalue text NOT NULL -); -CREATE FUNCTION unicode_return() RETURNS text AS E' -return u"\\x80" -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_trigger() RETURNS trigger AS E' -TD["new"]["testvalue"] = u"\\x80" -return "MODIFY" -' LANGUAGE plpythonu; -CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test - FOR EACH ROW EXECUTE PROCEDURE unicode_trigger(); -CREATE FUNCTION unicode_plan1() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue", ["text"]) -rv = plpy.execute(plan, [u"\\x80"], 1) -return rv[0]["testvalue"] -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_plan2() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"]) -rv = plpy.execute(plan, ["foo", "bar"], 1) -return rv[0]["testvalue"] -' LANGUAGE plpythonu; -SELECT unicode_return(); -ERROR: PL/Python: could not convert Python Unicode object to PostgreSQL server encoding -DETAIL: UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: while creating return value -PL/Python function "unicode_return" -INSERT INTO unicode_test (testvalue) VALUES ('test'); -ERROR: PL/Python: could not convert Python Unicode object to PostgreSQL server encoding -DETAIL: UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: while modifying trigger row -PL/Python function "unicode_trigger" -SELECT * FROM unicode_test; - testvalue ------------ -(0 rows) - -SELECT unicode_plan1(); -WARNING: PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan -CONTEXT: PL/Python function "unicode_plan1" -ERROR: PL/Python: could not convert Python Unicode object to PostgreSQL server encoding -DETAIL: UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan1" -SELECT unicode_plan2(); - unicode_plan2 ---------------- - foobar -(1 row) - diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c index 85829a0d3b..b748af1d35 100644 --- a/src/pl/plpython/plpython.c +++ b/src/pl/plpython/plpython.c @@ -3686,66 +3686,56 @@ PLy_free(void *ptr) static PyObject * PLyUnicode_Bytes(PyObject *unicode) { - PyObject *rv; - const char *serverenc; + PyObject *bytes, *rv; + char *utf8string, *encoded; - /* - * Map PostgreSQL encoding to a Python encoding name. - */ - switch (GetDatabaseEncoding()) - { - case PG_SQL_ASCII: - /* - * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's - * 'ascii' means true 7-bit only ASCII, while PostgreSQL's - * SQL_ASCII means that anything is allowed, and the system doesn't - * try to interpret the bytes in any way. But not sure what else - * to do, and we haven't heard any complaints... - */ - serverenc = "ascii"; - break; - case PG_WIN1250: - serverenc = "cp1250"; - break; - case PG_WIN1251: - serverenc = "cp1251"; - break; - case PG_WIN1252: - serverenc = "cp1252"; - break; - case PG_WIN1253: - serverenc = "cp1253"; - break; - case PG_WIN1254: - serverenc = "cp1254"; - break; - case PG_WIN1255: - serverenc = "cp1255"; - break; - case PG_WIN1256: - serverenc = "cp1256"; - break; - case PG_WIN1257: - serverenc = "cp1257"; - break; - case PG_WIN1258: - serverenc = "cp1258"; - break; - case PG_WIN866: - serverenc = "cp866"; - break; - case PG_WIN874: - serverenc = "cp874"; - break; - default: - /* Other encodings have the same name in Python. */ - serverenc = GetDatabaseEncodingName(); - break; + /* First encode the Python unicode object with UTF-8. */ + bytes = PyUnicode_AsUTF8String(unicode); + if (bytes == NULL) + PLy_elog(ERROR, "could not convert Python Unicode object to bytes"); + + utf8string = PyBytes_AsString(bytes); + if (utf8string == NULL) { + Py_DECREF(bytes); + PLy_elog(ERROR, "could not extract bytes from encoded string"); } - rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict"); - if (rv == NULL) - PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding"); + /* + * Then convert to server encoding if necessary. + * + * PyUnicode_AsEncodedString could be used to encode the object directly + * in the server encoding, but Python doesn't support all the encodings + * that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an + * intermediary in PLyUnicode_FromString as well. + */ + if (GetDatabaseEncoding() != PG_UTF8) + { + PG_TRY(); + { + encoded = (char *) pg_do_encoding_conversion( + (unsigned char *) utf8string, + strlen(utf8string), + PG_UTF8, + GetDatabaseEncoding()); + } + PG_CATCH(); + { + Py_DECREF(bytes); + PG_RE_THROW(); + } + PG_END_TRY(); + } + else + encoded = utf8string; + + /* finally, build a bytes object in the server encoding */ + rv = PyBytes_FromStringAndSize(encoded, strlen(encoded)); + + /* if pg_do_encoding_conversion allocated memory, free it now */ + if (utf8string != encoded) + pfree(encoded); + + Py_DECREF(bytes); return rv; }