mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-24 18:55:04 +08:00
e3dd7c06e6
As noted by Thomas Munro, CLDR 36 has added SOUND RECORDING COPYRIGHT (U+2117), and we use CLDR 41, so this can be removed from the set of special cases. The set of regression tests is expanded for degree signs, which are two of the special cases, and a fancy case with U+210C in Latin-ASCII.xml that we have discovered about when diving into what could be done for Cyrillic characters (this last part is material for a future patch, not tackled yet). While on it, some of the assertions of generate_unaccent_rules.py are expanded to report the codepoint on which a failure is found, something useful for debugging. Extracted from a larger patch by the same author. Author: Przemysław Sztoch Discussion: https://postgr.es/m/8478da0d-3b61-d24f-80b4-ce2f5e971c60@sztoch.pl
35 lines
1.1 KiB
SQL
35 lines
1.1 KiB
SQL
CREATE EXTENSION unaccent;
|
||
|
||
-- must have a UTF8 database
|
||
SELECT getdatabaseencoding();
|
||
|
||
SET client_encoding TO 'UTF8';
|
||
|
||
SELECT unaccent('foobar');
|
||
SELECT unaccent('ёлка');
|
||
SELECT unaccent('ЁЖИК');
|
||
SELECT unaccent('˃˖˗˜');
|
||
SELECT unaccent('À'); -- Remove combining diacritical 0x0300
|
||
SELECT unaccent('℃℉'); -- degree signs
|
||
SELECT unaccent('℗'); -- sound recording copyright
|
||
|
||
SELECT unaccent('unaccent', 'foobar');
|
||
SELECT unaccent('unaccent', 'ёлка');
|
||
SELECT unaccent('unaccent', 'ЁЖИК');
|
||
SELECT unaccent('unaccent', '˃˖˗˜');
|
||
SELECT unaccent('unaccent', 'À');
|
||
SELECT unaccent('unaccent', '℃℉');
|
||
SELECT unaccent('unaccent', '℗');
|
||
|
||
SELECT ts_lexize('unaccent', 'foobar');
|
||
SELECT ts_lexize('unaccent', 'ёлка');
|
||
SELECT ts_lexize('unaccent', 'ЁЖИК');
|
||
SELECT ts_lexize('unaccent', '˃˖˗˜');
|
||
SELECT ts_lexize('unaccent', 'À');
|
||
SELECT ts_lexize('unaccent', '℃℉');
|
||
SELECT ts_lexize('unaccent', '℗');
|
||
|
||
-- Controversial case. Black-Letter Capital H (U+210C) is translated by
|
||
-- Latin-ASCII.xml as 'x', but it should be 'H'.
|
||
SELECT unaccent('ℌ');
|