mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Bring SIMILAR TO and SUBSTRING into some semblance of conformance with
the SQL99 standard. (I'm not sure that the character-class features are quite right, but that can be fixed later.) Document SQL99 and POSIX regexps as being different features; provide variants of SUBSTRING for each.
This commit is contained in:
parent
ac355d558e
commit
9946b83ded
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/func.sgml,v 1.124 2002/09/21 18:32:53 petere Exp $
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/func.sgml,v 1.125 2002/09/22 17:27:23 tgl Exp $
|
||||
PostgreSQL documentation
|
||||
-->
|
||||
|
||||
@ -921,18 +921,31 @@ PostgreSQL documentation
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>substring</function>(<parameter>string</parameter> <optional>from <replaceable>pattern</replaceable></optional> <optional>for <replaceable>escape</replaceable></optional>)</entry>
|
||||
<entry><function>substring</function>(<parameter>string</parameter> from <replaceable>pattern</replaceable>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
extract regular expression
|
||||
extract substring matching POSIX regular expression
|
||||
<indexterm>
|
||||
<primary>substring</primary>
|
||||
</indexterm>
|
||||
</entry>
|
||||
<entry><literal>substring('Thomas' from 'mas$' for <optional>escape '\\'</optional>)</literal></entry>
|
||||
<entry><literal>substring('Thomas' from '...$')</literal></entry>
|
||||
<entry><literal>mas</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>substring</function>(<parameter>string</parameter> from <replaceable>pattern</replaceable> for <replaceable>escape</replaceable>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
extract substring matching SQL99 regular expression
|
||||
<indexterm>
|
||||
<primary>substring</primary>
|
||||
</indexterm>
|
||||
</entry>
|
||||
<entry><literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal></entry>
|
||||
<entry><literal>oma</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>trim</function>(<optional>leading | trailing | both</optional>
|
||||
@ -960,6 +973,328 @@ PostgreSQL documentation
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
Additional string manipulation functions are available and are
|
||||
listed below. Some of them are used internally to implement the
|
||||
<acronym>SQL</acronym>-standard string functions listed above.
|
||||
</para>
|
||||
|
||||
<table id="functions-string-other">
|
||||
<title>Other String Functions</title>
|
||||
<tgroup cols="5">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Function</entry>
|
||||
<entry>Return Type</entry>
|
||||
<entry>Description</entry>
|
||||
<entry>Example</entry>
|
||||
<entry>Result</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><function>ascii</function>(<type>text</type>)</entry>
|
||||
<entry>integer</entry>
|
||||
<entry>Returns the <acronym>ASCII</acronym> code of the first character of the argument.</entry>
|
||||
<entry><literal>ascii('x')</literal></entry>
|
||||
<entry><literal>120</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>btrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>trim</parameter> <type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Remove (trim) the longest string consisting only of characters
|
||||
in <parameter>trim</parameter> from the start and end of
|
||||
<parameter>string</parameter>.
|
||||
</entry>
|
||||
<entry><literal>btrim('xyxtrimyyx','xy')</literal></entry>
|
||||
<entry><literal>trim</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>chr</function>(<type>integer</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Returns the character with the given <acronym>ASCII</acronym> code.</entry>
|
||||
<entry><literal>chr(65)</literal></entry>
|
||||
<entry><literal>A</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>convert</function>(<parameter>string</parameter>
|
||||
<type>text</type>,
|
||||
<optional><parameter>src_encoding</parameter> <type>name</type>,</optional>
|
||||
<parameter>dest_encoding</parameter> <type>name</type>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Converts string using <parameter>dest_encoding</parameter>.
|
||||
The original encoding is specified by
|
||||
<parameter>src_encoding</parameter>. If
|
||||
<parameter>src_encoding</parameter> is omitted, database
|
||||
encoding is assumed.
|
||||
</entry>
|
||||
<entry><literal>convert('text_in_unicode', 'UNICODE', 'LATIN1')</literal></entry>
|
||||
<entry><literal>text_in_unicode</literal> represented in ISO 8859-1</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>decode</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>type</parameter> <type>text</type>)
|
||||
</entry>
|
||||
<entry><type>bytea</type></entry>
|
||||
<entry>
|
||||
Decodes binary data from <parameter>string</parameter> previously
|
||||
encoded with encode(). Parameter type is same as in encode().
|
||||
</entry>
|
||||
<entry><literal>decode('MTIzAAE=', 'base64')</literal></entry>
|
||||
<entry><literal>123\000\001</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>encode</function>(<parameter>data</parameter> <type>bytea</type>,
|
||||
<parameter>type</parameter> <type>text</type>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Encodes binary data to <acronym>ASCII</acronym>-only representation. Supported
|
||||
types are: 'base64', 'hex', 'escape'.
|
||||
</entry>
|
||||
<entry><literal>encode('123\\000\\001', 'base64')</literal></entry>
|
||||
<entry><literal>MTIzAAE=</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>initcap</function>(<type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Converts first letter of each word (whitespace separated) to upper case.</entry>
|
||||
<entry><literal>initcap('hi thomas')</literal></entry>
|
||||
<entry><literal>Hi Thomas</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>length</function>(<parameter>string</parameter>)</entry>
|
||||
<entry><type>integer</type></entry>
|
||||
<entry>
|
||||
length of string
|
||||
<indexterm>
|
||||
<primary>character strings</primary>
|
||||
<secondary>length</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>length</primary>
|
||||
<secondary>character strings</secondary>
|
||||
<see>character strings, length</see>
|
||||
</indexterm>
|
||||
</entry>
|
||||
<entry><literal>length('jose')</literal></entry>
|
||||
<entry><literal>4</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>lpad</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>length</parameter> <type>integer</type>
|
||||
<optional>, <parameter>fill</parameter> <type>text</type></optional>)
|
||||
</entry>
|
||||
<entry>text</entry>
|
||||
<entry>
|
||||
Fills up the <parameter>string</parameter> to length
|
||||
<parameter>length</parameter> by prepending the characters
|
||||
<parameter>fill</parameter> (a space by default). If the
|
||||
<parameter>string</parameter> is already longer than
|
||||
<parameter>length</parameter> then it is truncated (on the
|
||||
right).
|
||||
</entry>
|
||||
<entry><literal>lpad('hi', 5, 'xy')</literal></entry>
|
||||
<entry><literal>xyxhi</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>ltrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>text</parameter> <type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Removes the longest string containing only characters from
|
||||
<parameter>trim</parameter> from the start of the string.
|
||||
</entry>
|
||||
<entry><literal>ltrim('zzzytrim','xyz')</literal></entry>
|
||||
<entry><literal>trim</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>pg_client_encoding</function>()</entry>
|
||||
<entry><type>name</type></entry>
|
||||
<entry>
|
||||
Returns current client encoding name.
|
||||
</entry>
|
||||
<entry><literal>pg_client_encoding()</literal></entry>
|
||||
<entry><literal>SQL_ASCII</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>quote_ident</function>(<parameter>string</parameter> text)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Returns the given string suitably quoted to be used as an identifier
|
||||
in an SQL query string.
|
||||
Quotes are added only if necessary (i.e., if the string contains
|
||||
non-identifier characters or would be case-folded).
|
||||
Embedded quotes are properly doubled.
|
||||
</entry>
|
||||
<entry><literal>quote_ident('Foo')</literal></entry>
|
||||
<entry><literal>"Foo"</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>quote_literal</function>(<parameter>string</parameter> text)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Returns the given string suitably quoted to be used as a literal
|
||||
in an SQL query string.
|
||||
Embedded quotes and backslashes are properly doubled.
|
||||
</entry>
|
||||
<entry><literal>quote_literal('O\'Reilly')</literal></entry>
|
||||
<entry><literal>'O''Reilly'</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>repeat</function>(<type>text</type>, <type>integer</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Repeat text a number of times.</entry>
|
||||
<entry><literal>repeat('Pg', 4)</literal></entry>
|
||||
<entry><literal>PgPgPgPg</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>replace</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>from</parameter> <type>text</type>,
|
||||
<parameter>to</parameter> <type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Replace all occurrences in <parameter>string</parameter> of substring
|
||||
<parameter>from</parameter> with substring <parameter>to</parameter>
|
||||
</entry>
|
||||
<entry><literal>replace('abcdefabcdef', 'cd', 'XX')</literal></entry>
|
||||
<entry><literal>abXXefabXXef</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>rpad</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>length</parameter> <type>integer</type>
|
||||
<optional>, <parameter>fill</parameter> <type>text</type></optional>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Fills up the <parameter>string</parameter> to length
|
||||
<parameter>length</parameter> by appending the characters
|
||||
<parameter>fill</parameter> (a space by default). If the
|
||||
<parameter>string</parameter> is already longer than
|
||||
<parameter>length</parameter> then it is truncated.
|
||||
</entry>
|
||||
<entry><literal>rpad('hi', 5, 'xy')</literal></entry>
|
||||
<entry><literal>hixyx</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>rtrim</function>(<parameter>string</parameter>
|
||||
text, <parameter>trim</parameter> text)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Removes the longest string containing only characters from
|
||||
<parameter>trim</parameter> from the end of the string.
|
||||
</entry>
|
||||
<entry><literal>rtrim('trimxxxx','x')</literal></entry>
|
||||
<entry><literal>trim</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>split_part</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>delimiter</parameter> <type>text</type>,
|
||||
<parameter>column</parameter> <type>integer</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Split <parameter>string</parameter> on <parameter>delimiter</parameter>
|
||||
returning the resulting (one based) <parameter>column</parameter> number.
|
||||
</entry>
|
||||
<entry><literal>split_part('abc~@~def~@~ghi','~@~',2)</literal></entry>
|
||||
<entry><literal>def</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>strpos</function>(<parameter>string</parameter>, <parameter>substring</parameter>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Locates specified substring. (same as
|
||||
<literal>position(<parameter>substring</parameter> in
|
||||
<parameter>string</parameter>)</literal>, but note the reversed
|
||||
argument order)
|
||||
</entry>
|
||||
<entry><literal>strpos('high','ig')</literal></entry>
|
||||
<entry><literal>2</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>substr</function>(<parameter>string</parameter>, <parameter>from</parameter> <optional>, <parameter>count</parameter></optional>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Extracts specified substring. (same as
|
||||
<literal>substring(<parameter>string</parameter> from <parameter>from</parameter> for <parameter>count</parameter>)</literal>)
|
||||
</entry>
|
||||
<entry><literal>substr('alphabet', 3, 2)</literal></entry>
|
||||
<entry><literal>ph</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>to_ascii</function>(<type>text</type>
|
||||
<optional>, <parameter>encoding</parameter></optional>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Converts text from multibyte encoding to <acronym>ASCII</acronym>.</entry>
|
||||
<entry><literal>to_ascii('Karel')</literal></entry>
|
||||
<entry><literal>Karel</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>to_hex</function>(<parameter>number</parameter> <type>integer</type>
|
||||
or <type>bigint</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Convert <parameter>number</parameter> to its equivalent hexadecimal
|
||||
representation.
|
||||
</entry>
|
||||
<entry><literal>to_hex(9223372036854775807::bigint)</literal></entry>
|
||||
<entry><literal>7fffffffffffffff</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>translate</function>(<parameter>string</parameter>
|
||||
<type>text</type>,
|
||||
<parameter>from</parameter> <type>text</type>,
|
||||
<parameter>to</parameter> <type>text</type>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Any character in <parameter>string</parameter> that matches a
|
||||
character in the <parameter>from</parameter> set is replaced by
|
||||
the corresponding character in the <parameter>to</parameter>
|
||||
set.
|
||||
</entry>
|
||||
<entry><literal>translate('12345', '14', 'ax')</literal></entry>
|
||||
<entry><literal>a23x5</literal></entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
The <function>to_ascii</function> function supports conversion from
|
||||
LATIN1, LATIN2, WIN1250 (CP1250) only.
|
||||
</para>
|
||||
|
||||
|
||||
<table id="conversion-names">
|
||||
<title>Available conversion names</title>
|
||||
<tgroup cols="3">
|
||||
@ -1660,326 +1995,6 @@ PostgreSQL documentation
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
Additional string manipulation functions are available and are
|
||||
listed below. Some of them are used internally to implement the
|
||||
<acronym>SQL</acronym>-standard string functions listed above.
|
||||
</para>
|
||||
|
||||
<table id="functions-string-other">
|
||||
<title>Other String Functions</title>
|
||||
<tgroup cols="5">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Function</entry>
|
||||
<entry>Return Type</entry>
|
||||
<entry>Description</entry>
|
||||
<entry>Example</entry>
|
||||
<entry>Result</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><function>ascii</function>(<type>text</type>)</entry>
|
||||
<entry>integer</entry>
|
||||
<entry>Returns the <acronym>ASCII</acronym> code of the first character of the argument.</entry>
|
||||
<entry><literal>ascii('x')</literal></entry>
|
||||
<entry><literal>120</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>btrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>trim</parameter> <type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Remove (trim) the longest string consisting only of characters
|
||||
in <parameter>trim</parameter> from the start and end of
|
||||
<parameter>string</parameter>.
|
||||
</entry>
|
||||
<entry><literal>btrim('xyxtrimyyx','xy')</literal></entry>
|
||||
<entry><literal>trim</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>chr</function>(<type>integer</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Returns the character with the given <acronym>ASCII</acronym> code.</entry>
|
||||
<entry><literal>chr(65)</literal></entry>
|
||||
<entry><literal>A</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>convert</function>(<parameter>string</parameter>
|
||||
<type>text</type>,
|
||||
<optional><parameter>src_encoding</parameter> <type>name</type>,</optional>
|
||||
<parameter>dest_encoding</parameter> <type>name</type>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Converts string using <parameter>dest_encoding</parameter>.
|
||||
The original encoding is specified by
|
||||
<parameter>src_encoding</parameter>. If
|
||||
<parameter>src_encoding</parameter> is omitted, database
|
||||
encoding is assumed.
|
||||
</entry>
|
||||
<entry><literal>convert('text_in_unicode', 'UNICODE', 'LATIN1')</literal></entry>
|
||||
<entry><literal>text_in_unicode</literal> represented in ISO 8859-1</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>decode</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>type</parameter> <type>text</type>)
|
||||
</entry>
|
||||
<entry><type>bytea</type></entry>
|
||||
<entry>
|
||||
Decodes binary data from <parameter>string</parameter> previously
|
||||
encoded with encode(). Parameter type is same as in encode().
|
||||
</entry>
|
||||
<entry><literal>decode('MTIzAAE=', 'base64')</literal></entry>
|
||||
<entry><literal>123\000\001</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>encode</function>(<parameter>data</parameter> <type>bytea</type>,
|
||||
<parameter>type</parameter> <type>text</type>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Encodes binary data to <acronym>ASCII</acronym>-only representation. Supported
|
||||
types are: 'base64', 'hex', 'escape'.
|
||||
</entry>
|
||||
<entry><literal>encode('123\\000\\001', 'base64')</literal></entry>
|
||||
<entry><literal>MTIzAAE=</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>initcap</function>(<type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Converts first letter of each word (whitespace separated) to upper case.</entry>
|
||||
<entry><literal>initcap('hi thomas')</literal></entry>
|
||||
<entry><literal>Hi Thomas</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>length</function>(<parameter>string</parameter>)</entry>
|
||||
<entry><type>integer</type></entry>
|
||||
<entry>
|
||||
length of string
|
||||
<indexterm>
|
||||
<primary>character strings</primary>
|
||||
<secondary>length</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>length</primary>
|
||||
<secondary>character strings</secondary>
|
||||
<see>character strings, length</see>
|
||||
</indexterm>
|
||||
</entry>
|
||||
<entry><literal>length('jose')</literal></entry>
|
||||
<entry><literal>4</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>lpad</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>length</parameter> <type>integer</type>
|
||||
<optional>, <parameter>fill</parameter> <type>text</type></optional>)
|
||||
</entry>
|
||||
<entry>text</entry>
|
||||
<entry>
|
||||
Fills up the <parameter>string</parameter> to length
|
||||
<parameter>length</parameter> by prepending the characters
|
||||
<parameter>fill</parameter> (a space by default). If the
|
||||
<parameter>string</parameter> is already longer than
|
||||
<parameter>length</parameter> then it is truncated (on the
|
||||
right).
|
||||
</entry>
|
||||
<entry><literal>lpad('hi', 5, 'xy')</literal></entry>
|
||||
<entry><literal>xyxhi</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>ltrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>text</parameter> <type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Removes the longest string containing only characters from
|
||||
<parameter>trim</parameter> from the start of the string.
|
||||
</entry>
|
||||
<entry><literal>ltrim('zzzytrim','xyz')</literal></entry>
|
||||
<entry><literal>trim</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>pg_client_encoding</function>()</entry>
|
||||
<entry><type>name</type></entry>
|
||||
<entry>
|
||||
Returns current client encoding name.
|
||||
</entry>
|
||||
<entry><literal>pg_client_encoding()</literal></entry>
|
||||
<entry><literal>SQL_ASCII</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>quote_ident</function>(<parameter>string</parameter> text)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Returns the given string suitably quoted to be used as an identifier
|
||||
in an SQL query string.
|
||||
Quotes are added only if necessary (i.e., if the string contains
|
||||
non-identifier characters or would be case-folded).
|
||||
Embedded quotes are properly doubled.
|
||||
</entry>
|
||||
<entry><literal>quote_ident('Foo')</literal></entry>
|
||||
<entry><literal>"Foo"</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>quote_literal</function>(<parameter>string</parameter> text)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Returns the given string suitably quoted to be used as a literal
|
||||
in an SQL query string.
|
||||
Embedded quotes and backslashes are properly doubled.
|
||||
</entry>
|
||||
<entry><literal>quote_literal('O\'Reilly')</literal></entry>
|
||||
<entry><literal>'O''Reilly'</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>repeat</function>(<type>text</type>, <type>integer</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Repeat text a number of times.</entry>
|
||||
<entry><literal>repeat('Pg', 4)</literal></entry>
|
||||
<entry><literal>PgPgPgPg</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>replace</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>from</parameter> <type>text</type>,
|
||||
<parameter>to</parameter> <type>text</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Replace all occurrences in <parameter>string</parameter> of substring
|
||||
<parameter>from</parameter> with substring <parameter>to</parameter>
|
||||
</entry>
|
||||
<entry><literal>replace('abcdefabcdef', 'cd', 'XX')</literal></entry>
|
||||
<entry><literal>abXXefabXXef</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>rpad</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>length</parameter> <type>integer</type>
|
||||
<optional>, <parameter>fill</parameter> <type>text</type></optional>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Fills up the <parameter>string</parameter> to length
|
||||
<parameter>length</parameter> by appending the characters
|
||||
<parameter>fill</parameter> (a space by default). If the
|
||||
<parameter>string</parameter> is already longer than
|
||||
<parameter>length</parameter> then it is truncated.
|
||||
</entry>
|
||||
<entry><literal>rpad('hi', 5, 'xy')</literal></entry>
|
||||
<entry><literal>hixyx</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>rtrim</function>(<parameter>string</parameter>
|
||||
text, <parameter>trim</parameter> text)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Removes the longest string containing only characters from
|
||||
<parameter>trim</parameter> from the end of the string.
|
||||
</entry>
|
||||
<entry><literal>rtrim('trimxxxx','x')</literal></entry>
|
||||
<entry><literal>trim</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>split_part</function>(<parameter>string</parameter> <type>text</type>,
|
||||
<parameter>delimiter</parameter> <type>text</type>,
|
||||
<parameter>column</parameter> <type>integer</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Split <parameter>string</parameter> on <parameter>delimiter</parameter>
|
||||
returning the resulting (one based) <parameter>column</parameter> number.
|
||||
</entry>
|
||||
<entry><literal>split_part('abc~@~def~@~ghi','~@~',2)</literal></entry>
|
||||
<entry><literal>def</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>strpos</function>(<parameter>string</parameter>, <parameter>substring</parameter>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Locates specified substring. (same as
|
||||
<literal>position(<parameter>substring</parameter> in
|
||||
<parameter>string</parameter>)</literal>, but note the reversed
|
||||
argument order)
|
||||
</entry>
|
||||
<entry><literal>strpos('high','ig')</literal></entry>
|
||||
<entry><literal>2</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>substr</function>(<parameter>string</parameter>, <parameter>from</parameter> <optional>, <parameter>count</parameter></optional>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Extracts specified substring. (same as
|
||||
<literal>substring(<parameter>string</parameter> from <parameter>from</parameter> for <parameter>count</parameter>)</literal>)
|
||||
</entry>
|
||||
<entry><literal>substr('alphabet', 3, 2)</literal></entry>
|
||||
<entry><literal>ph</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>to_ascii</function>(<type>text</type>
|
||||
<optional>, <parameter>encoding</parameter></optional>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Converts text from multibyte encoding to <acronym>ASCII</acronym>.</entry>
|
||||
<entry><literal>to_ascii('Karel')</literal></entry>
|
||||
<entry><literal>Karel</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>to_hex</function>(<parameter>number</parameter> <type>integer</type>
|
||||
or <type>bigint</type>)</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Convert <parameter>number</parameter> to its equivalent hexadecimal
|
||||
representation.
|
||||
</entry>
|
||||
<entry><literal>to_hex(9223372036854775807::bigint)</literal></entry>
|
||||
<entry><literal>7fffffffffffffff</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<function>translate</function>(<parameter>string</parameter>
|
||||
<type>text</type>,
|
||||
<parameter>from</parameter> <type>text</type>,
|
||||
<parameter>to</parameter> <type>text</type>)
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Any character in <parameter>string</parameter> that matches a
|
||||
character in the <parameter>from</parameter> set is replaced by
|
||||
the corresponding character in the <parameter>to</parameter>
|
||||
set.
|
||||
</entry>
|
||||
<entry><literal>translate('12345', '14', 'ax')</literal></entry>
|
||||
<entry><literal>a23x5</literal></entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
The <function>to_ascii</function> function supports conversion from
|
||||
LATIN1, LATIN2, WIN1250 (CP1250) only.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
|
||||
@ -2171,16 +2186,16 @@ PostgreSQL documentation
|
||||
<title>Pattern Matching</title>
|
||||
|
||||
<para>
|
||||
There are two separate approaches to pattern matching provided by
|
||||
There are three separate approaches to pattern matching provided by
|
||||
<productname>PostgreSQL</productname>: the traditional
|
||||
<acronym>SQL</acronym>
|
||||
<function>LIKE</function> operator and the more recent
|
||||
<function>LIKE</function> operator, the more recent
|
||||
<acronym>SQL99</acronym>
|
||||
<function>SIMILAR TO</function> operator implementing
|
||||
<function>SIMILAR TO</function> operator, and
|
||||
<acronym>POSIX</acronym>-style regular expressions.
|
||||
Additionally, a pattern matching function,
|
||||
<function>SUBSTRING</function>, is available, as defined in
|
||||
<acronym>SQL99</acronym>.
|
||||
<function>SUBSTRING</function>, is available, using either
|
||||
<acronym>SQL99</acronym>-style or POSIX-style regular expressions.
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
@ -2190,13 +2205,6 @@ PostgreSQL documentation
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
Both <function>LIKE</function> and <function>SIMILAR TO</function>
|
||||
are SQL-standard operators which are also available in alternate
|
||||
forms as <productname>PostgreSQL</productname> operators; look at
|
||||
<literal>~</literal> and <literal>~~</literal> for examples.
|
||||
</para>
|
||||
|
||||
<sect2 id="functions-like">
|
||||
<title><function>LIKE</function></title>
|
||||
|
||||
@ -2296,11 +2304,142 @@ PostgreSQL documentation
|
||||
</sect2>
|
||||
|
||||
|
||||
<sect2 id="functions-regexp">
|
||||
<title><function>SIMILAR TO</function> and <acronym>POSIX</acronym>
|
||||
<sect2 id="functions-sql99-regexp">
|
||||
<title><function>SIMILAR TO</function> and <acronym>SQL99</acronym>
|
||||
Regular Expressions</title>
|
||||
|
||||
<indexterm zone="functions-regexp">
|
||||
<indexterm zone="functions-sql99-regexp">
|
||||
<primary>regular expressions</primary>
|
||||
<seealso>pattern matching</seealso>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>similar to</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>substring</primary>
|
||||
</indexterm>
|
||||
|
||||
<synopsis>
|
||||
<replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
|
||||
<replaceable>string</replaceable> NOT SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
|
||||
</synopsis>
|
||||
|
||||
<para>
|
||||
The <function>SIMILAR TO</function> operator returns true or false
|
||||
depending on whether its pattern matches the given string. It is
|
||||
much like <function>LIKE</function>, except that it interprets the
|
||||
pattern using <acronym>SQL99</acronym>'s definition of a regular
|
||||
expression.
|
||||
<acronym>SQL99</acronym>'s regular expressions are a curious cross
|
||||
between <function>LIKE</function> notation and common regular expression
|
||||
notation.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Like <function>LIKE</function>, the <function>SIMILAR TO</function>
|
||||
operator succeeds only if its pattern matches the entire string;
|
||||
this is unlike common regular expression practice, wherein the pattern
|
||||
may match any part of the string.
|
||||
Also like
|
||||
<function>LIKE</function>, <function>SIMILAR TO</function> uses
|
||||
<literal>%</> and <literal>_</> as wildcard characters denoting
|
||||
any string and any single character, respectively (these are
|
||||
comparable to <literal>.*</> and <literal>.</> in POSIX regular
|
||||
expressions).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
In addition to these facilities borrowed from <function>LIKE</function>,
|
||||
<function>SIMILAR TO</function> supports these pattern-matching
|
||||
metacharacters borrowed from POSIX regular expressions:
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>|</literal> denotes alternation (either of two alternatives).
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>*</literal> denotes repetition of the previous item zero
|
||||
or more times.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>+</literal> denotes repetition of the previous item one
|
||||
or more times.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Parentheses <literal>()</literal> may be used to group items into
|
||||
a single logical item.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
A bracket expression <literal>[...]</literal> specifies a character
|
||||
class, just as in POSIX regular expressions.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
Notice that bounded repetition (<literal>?</> and <literal>{...}</>)
|
||||
are not provided, though they exist in POSIX. Also, dot (<literal>.</>)
|
||||
is not a metacharacter.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
As with <function>LIKE</>, a backslash disables the special meaning
|
||||
of any of these metacharacters; or a different escape character can
|
||||
be specified with <literal>ESCAPE</>.
|
||||
</para>
|
||||
|
||||
<informalexample>
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
'abc' SIMILAR TO 'abc' <lineannotation>true</lineannotation>
|
||||
'abc' SIMILAR TO 'a' <lineannotation>false</lineannotation>
|
||||
'abc' SIMILAR TO '%(b|d)%' <lineannotation>true</lineannotation>
|
||||
'abc' SIMILAR TO '(b|c)%' <lineannotation>false</lineannotation>
|
||||
</programlisting>
|
||||
</para>
|
||||
</informalexample>
|
||||
|
||||
<para>
|
||||
The SUBSTRING function with three parameters,
|
||||
<function>SUBSTRING</function>(<parameter>string</parameter> FROM
|
||||
<replaceable>pattern</replaceable> FOR
|
||||
<replaceable>escape</replaceable>), provides extraction of a substring
|
||||
that matches a SQL99 regular expression pattern. As with SIMILAR TO,
|
||||
the specified pattern must match to the entire data string, else the
|
||||
function fails and returns NULL. To indicate the part of the pattern
|
||||
that should be returned on success, SQL99 specifies that the pattern
|
||||
must contain two occurrences of the escape character followed by
|
||||
double quote (<literal>"</>). The text matching the portion of the
|
||||
pattern between these markers is returned.
|
||||
</para>
|
||||
|
||||
<informalexample>
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
SUBSTRING('foobar' FROM '%#"o_b#"%' FOR '#') <lineannotation>oob</lineannotation>
|
||||
SUBSTRING('foobar' FROM '#"o_b#"%' FOR '#') <lineannotation>NULL</lineannotation>
|
||||
</programlisting>
|
||||
</para>
|
||||
</informalexample>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="functions-posix-regexp">
|
||||
<title><acronym>POSIX</acronym> Regular Expressions</title>
|
||||
|
||||
<indexterm zone="functions-posix-regexp">
|
||||
<primary>regular expressions</primary>
|
||||
<seealso>pattern matching</seealso>
|
||||
</indexterm>
|
||||
@ -2341,12 +2480,6 @@ PostgreSQL documentation
|
||||
<entry>Does not match regular expression, case insensitive</entry>
|
||||
<entry><literal>'thomas' !~* '.*vadim.*'</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry> <literal>SIMILAR TO</literal> </entry>
|
||||
<entry>Matches regular expression, case sensitive</entry>
|
||||
<entry><literal>'thomas' SIMILAR TO '.*thomas.*'</literal></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
@ -2354,7 +2487,8 @@ PostgreSQL documentation
|
||||
<para>
|
||||
<acronym>POSIX</acronym> regular expressions provide a more
|
||||
powerful means for
|
||||
pattern matching than the <function>LIKE</function> function.
|
||||
pattern matching than the <function>LIKE</function> and
|
||||
<function>SIMILAR TO</> operators.
|
||||
Many Unix tools such as <command>egrep</command>,
|
||||
<command>sed</command>, or <command>awk</command> use a pattern
|
||||
matching language that is similar to the one described here.
|
||||
@ -2379,10 +2513,34 @@ PostgreSQL documentation
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
'abc' SIMILAR TO 'abc' <lineannotation>true</lineannotation>
|
||||
'abc' SIMILAR TO '^a' <lineannotation>true</lineannotation>
|
||||
'abc' SIMILAR TO '(b|d)' <lineannotation>true</lineannotation>
|
||||
'abc' SIMILAR TO '^(b|c)' <lineannotation>false</lineannotation>
|
||||
'abc' ~ 'abc' <lineannotation>true</lineannotation>
|
||||
'abc' ~ '^a' <lineannotation>true</lineannotation>
|
||||
'abc' ~ '(b|d)' <lineannotation>true</lineannotation>
|
||||
'abc' ~ '^(b|c)' <lineannotation>false</lineannotation>
|
||||
</programlisting>
|
||||
</para>
|
||||
</informalexample>
|
||||
|
||||
<para>
|
||||
The SUBSTRING function with two parameters,
|
||||
<function>SUBSTRING</function>(<parameter>string</parameter> FROM
|
||||
<replaceable>pattern</replaceable>), provides extraction of a substring
|
||||
that matches a POSIX regular expression pattern. It returns NULL if
|
||||
there is no match, otherwise the portion of the text that matched the
|
||||
pattern. But if the pattern contains any parentheses, the portion
|
||||
of the text that matched the first parenthesized subexpression (the
|
||||
one whose left parenthesis comes first) is
|
||||
returned. You can always put parentheses around the whole expression
|
||||
if you want to use parentheses within it without triggering this
|
||||
exception.
|
||||
</para>
|
||||
|
||||
<informalexample>
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
SUBSTRING('foobar' FROM 'o.b') <lineannotation>oob</lineannotation>
|
||||
SUBSTRING('foobar' FROM 'o(.)b') <lineannotation>o</lineannotation>
|
||||
</programlisting>
|
||||
</para>
|
||||
</informalexample>
|
||||
|
@ -11,7 +11,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.367 2002/09/18 21:35:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.368 2002/09/22 17:27:23 tgl Exp $
|
||||
*
|
||||
* HISTORY
|
||||
* AUTHOR DATE MAJOR EVENT
|
||||
@ -5644,22 +5644,40 @@ a_expr: c_expr { $$ = $1; }
|
||||
}
|
||||
|
||||
| a_expr SIMILAR TO a_expr %prec SIMILAR
|
||||
{ $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); }
|
||||
{
|
||||
A_Const *c = makeNode(A_Const);
|
||||
FuncCall *n = makeNode(FuncCall);
|
||||
c->val.type = T_Null;
|
||||
n->funcname = SystemFuncName("similar_escape");
|
||||
n->args = makeList2($4, (Node *) c);
|
||||
n->agg_star = FALSE;
|
||||
n->agg_distinct = FALSE;
|
||||
$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
|
||||
}
|
||||
| a_expr SIMILAR TO a_expr ESCAPE a_expr
|
||||
{
|
||||
FuncCall *n = makeNode(FuncCall);
|
||||
n->funcname = SystemFuncName("like_escape");
|
||||
n->funcname = SystemFuncName("similar_escape");
|
||||
n->args = makeList2($4, $6);
|
||||
n->agg_star = FALSE;
|
||||
n->agg_distinct = FALSE;
|
||||
$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
|
||||
}
|
||||
| a_expr NOT SIMILAR TO a_expr %prec SIMILAR
|
||||
{ $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); }
|
||||
{
|
||||
A_Const *c = makeNode(A_Const);
|
||||
FuncCall *n = makeNode(FuncCall);
|
||||
c->val.type = T_Null;
|
||||
n->funcname = SystemFuncName("similar_escape");
|
||||
n->args = makeList2($5, (Node *) c);
|
||||
n->agg_star = FALSE;
|
||||
n->agg_distinct = FALSE;
|
||||
$$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n);
|
||||
}
|
||||
| a_expr NOT SIMILAR TO a_expr ESCAPE a_expr
|
||||
{
|
||||
FuncCall *n = makeNode(FuncCall);
|
||||
n->funcname = SystemFuncName("like_escape");
|
||||
n->funcname = SystemFuncName("similar_escape");
|
||||
n->args = makeList2($5, $7);
|
||||
n->agg_star = FALSE;
|
||||
n->agg_distinct = FALSE;
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $
|
||||
*
|
||||
* Alistair Crooks added the code for the regex caching
|
||||
* agc - cached the regular expressions used - there's a good chance
|
||||
@ -317,8 +317,7 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
||||
char *sterm;
|
||||
int len;
|
||||
bool match;
|
||||
int nmatch = 1;
|
||||
regmatch_t pmatch;
|
||||
regmatch_t pmatch[2];
|
||||
|
||||
/* be sure sterm is null-terminated */
|
||||
len = VARSIZE(s) - VARHDRSZ;
|
||||
@ -327,21 +326,131 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
||||
sterm[len] = '\0';
|
||||
|
||||
/*
|
||||
* We need the match info back from the pattern match to be able to
|
||||
* actually extract the substring. It seems to be adequate to pass in
|
||||
* a structure to return only one result.
|
||||
* We pass two regmatch_t structs to get info about the overall match
|
||||
* and the match for the first parenthesized subexpression (if any).
|
||||
* If there is a parenthesized subexpression, we return what it matched;
|
||||
* else return what the whole regexp matched.
|
||||
*/
|
||||
match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch);
|
||||
match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch);
|
||||
|
||||
pfree(sterm);
|
||||
|
||||
/* match? then return the substring matching the pattern */
|
||||
if (match)
|
||||
{
|
||||
int so,
|
||||
eo;
|
||||
|
||||
so = pmatch[1].rm_so;
|
||||
eo = pmatch[1].rm_eo;
|
||||
if (so < 0 || eo < 0)
|
||||
{
|
||||
/* no parenthesized subexpression */
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
|
||||
return (DirectFunctionCall3(text_substr,
|
||||
PointerGetDatum(s),
|
||||
Int32GetDatum(pmatch.rm_so + 1),
|
||||
Int32GetDatum(pmatch.rm_eo - pmatch.rm_so)));
|
||||
Int32GetDatum(so + 1),
|
||||
Int32GetDatum(eo - so)));
|
||||
}
|
||||
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
/* similar_escape()
|
||||
* Convert a SQL99 regexp pattern to POSIX style, so it can be used by
|
||||
* our regexp engine.
|
||||
*/
|
||||
Datum
|
||||
similar_escape(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *pat_text;
|
||||
text *esc_text;
|
||||
text *result;
|
||||
unsigned char *p,
|
||||
*e,
|
||||
*r;
|
||||
int plen,
|
||||
elen;
|
||||
bool afterescape = false;
|
||||
int nquotes = 0;
|
||||
|
||||
/* This function is not strict, so must test explicitly */
|
||||
if (PG_ARGISNULL(0))
|
||||
PG_RETURN_NULL();
|
||||
pat_text = PG_GETARG_TEXT_P(0);
|
||||
p = VARDATA(pat_text);
|
||||
plen = (VARSIZE(pat_text) - VARHDRSZ);
|
||||
if (PG_ARGISNULL(1))
|
||||
{
|
||||
/* No ESCAPE clause provided; default to backslash as escape */
|
||||
e = "\\";
|
||||
elen = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
esc_text = PG_GETARG_TEXT_P(1);
|
||||
e = VARDATA(esc_text);
|
||||
elen = (VARSIZE(esc_text) - VARHDRSZ);
|
||||
if (elen == 0)
|
||||
e = NULL; /* no escape character */
|
||||
else if (elen != 1)
|
||||
elog(ERROR, "ESCAPE string must be empty or one character");
|
||||
}
|
||||
|
||||
/* We need room for ^, $, and up to 2 output bytes per input byte */
|
||||
result = (text *) palloc(VARHDRSZ + 2 + 2 * plen);
|
||||
r = VARDATA(result);
|
||||
|
||||
*r++ = '^';
|
||||
|
||||
while (plen > 0)
|
||||
{
|
||||
unsigned char pchar = *p;
|
||||
|
||||
if (afterescape)
|
||||
{
|
||||
if (pchar == '"') /* for SUBSTRING patterns */
|
||||
*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
|
||||
else
|
||||
{
|
||||
*r++ = '\\';
|
||||
*r++ = pchar;
|
||||
}
|
||||
afterescape = false;
|
||||
}
|
||||
else if (e && pchar == *e)
|
||||
{
|
||||
/* SQL99 escape character; do not send to output */
|
||||
afterescape = true;
|
||||
}
|
||||
else if (pchar == '%')
|
||||
{
|
||||
*r++ = '.';
|
||||
*r++ = '*';
|
||||
}
|
||||
else if (pchar == '_')
|
||||
{
|
||||
*r++ = '.';
|
||||
}
|
||||
else if (pchar == '\\' || pchar == '.' || pchar == '?' ||
|
||||
pchar == '{')
|
||||
{
|
||||
*r++ = '\\';
|
||||
*r++ = pchar;
|
||||
}
|
||||
else
|
||||
{
|
||||
*r++ = pchar;
|
||||
}
|
||||
p++, plen--;
|
||||
}
|
||||
|
||||
*r++ = '$';
|
||||
|
||||
VARATT_SIZEP(result) = r - ((unsigned char *) result);
|
||||
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pg_proc.h,v 1.272 2002/09/18 21:35:23 tgl Exp $
|
||||
* $Id: pg_proc.h,v 1.273 2002/09/22 17:27:23 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* The script catalog/genbki.sh reads this file and generates .bki
|
||||
@ -2076,6 +2076,9 @@ DESCR("convert int4 to char");
|
||||
DATA(insert OID = 1622 ( repeat PGNSP PGUID 12 f f t f i 2 25 "25 23" repeat - _null_ ));
|
||||
DESCR("replicate string int4 times");
|
||||
|
||||
DATA(insert OID = 1623 ( similar_escape PGNSP PGUID 12 f f f f i 2 25 "25 25" similar_escape - _null_ ));
|
||||
DESCR("convert SQL99 regexp pattern to POSIX style");
|
||||
|
||||
DATA(insert OID = 1624 ( mul_d_interval PGNSP PGUID 12 f f t f i 2 1186 "701 1186" mul_d_interval - _null_ ));
|
||||
|
||||
DATA(insert OID = 1633 ( texticlike PGNSP PGUID 12 f f t f i 2 16 "25 25" texticlike - _null_ ));
|
||||
@ -2087,7 +2090,7 @@ DESCR("matches LIKE expression, case-insensitive");
|
||||
DATA(insert OID = 1636 ( nameicnlike PGNSP PGUID 12 f f t f i 2 16 "19 25" nameicnlike - _null_ ));
|
||||
DESCR("does not match LIKE expression, case-insensitive");
|
||||
DATA(insert OID = 1637 ( like_escape PGNSP PGUID 12 f f t f i 2 25 "25 25" like_escape - _null_ ));
|
||||
DESCR("convert match pattern to use backslash escapes");
|
||||
DESCR("convert LIKE pattern to use backslash escapes");
|
||||
|
||||
DATA(insert OID = 1689 ( update_pg_pwd_and_pg_group PGNSP PGUID 12 f f t f v 0 2279 "" update_pg_pwd_and_pg_group - _null_ ));
|
||||
DESCR("update pg_pwd and pg_group files");
|
||||
@ -2784,7 +2787,7 @@ DESCR("matches LIKE expression");
|
||||
DATA(insert OID = 2008 ( notlike PGNSP PGUID 12 f f t f i 2 16 "17 17" byteanlike - _null_ ));
|
||||
DESCR("does not match LIKE expression");
|
||||
DATA(insert OID = 2009 ( like_escape PGNSP PGUID 12 f f t f i 2 17 "17 17" like_escape_bytea - _null_ ));
|
||||
DESCR("convert match pattern to use backslash escapes");
|
||||
DESCR("convert LIKE pattern to use backslash escapes");
|
||||
DATA(insert OID = 2010 ( length PGNSP PGUID 12 f f t f i 1 23 "17" byteaoctetlen - _null_ ));
|
||||
DESCR("octet length");
|
||||
DATA(insert OID = 2011 ( byteacat PGNSP PGUID 12 f f t f i 2 17 "17 17" byteacat - _null_ ));
|
||||
@ -2889,9 +2892,9 @@ DATA(insert OID = 2072 ( date_mi_interval PGNSP PGUID 14 f f t f i 2 1114 "1082
|
||||
DESCR("subtract");
|
||||
|
||||
DATA(insert OID = 2073 ( substring PGNSP PGUID 12 f f t f i 2 25 "25 25" textregexsubstr - _null_ ));
|
||||
DESCR("substitutes regular expression");
|
||||
DATA(insert OID = 2074 ( substring PGNSP PGUID 14 f f t f i 3 25 "25 25 25" "select substring($1, like_escape($2, $3))" - _null_ ));
|
||||
DESCR("substitutes regular expression with escape argument");
|
||||
DESCR("extracts text matching regular expression");
|
||||
DATA(insert OID = 2074 ( substring PGNSP PGUID 14 f f t f i 3 25 "25 25 25" "select substring($1, similar_escape($2, $3))" - _null_ ));
|
||||
DESCR("extracts text matching SQL99 regular expression");
|
||||
|
||||
DATA(insert OID = 2075 ( bit PGNSP PGUID 12 f f t f i 1 1560 "20" bitfromint8 - _null_ ));
|
||||
DESCR("int8 to bitstring");
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: builtins.h,v 1.201 2002/09/19 22:48:34 tgl Exp $
|
||||
* $Id: builtins.h,v 1.202 2002/09/22 17:27:25 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -371,6 +371,7 @@ extern Datum nameicregexne(PG_FUNCTION_ARGS);
|
||||
extern Datum texticregexeq(PG_FUNCTION_ARGS);
|
||||
extern Datum texticregexne(PG_FUNCTION_ARGS);
|
||||
extern Datum textregexsubstr(PG_FUNCTION_ARGS);
|
||||
extern Datum similar_escape(PG_FUNCTION_ARGS);
|
||||
|
||||
/* regproc.c */
|
||||
extern Datum regprocin(PG_FUNCTION_ARGS);
|
||||
|
@ -142,15 +142,15 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- T581 regular expression substring
|
||||
SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
|
||||
-- T581 regular expression substring (with SQL99's bizarre regexp syntax)
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
|
||||
bcd
|
||||
-----
|
||||
bcd
|
||||
(1 row)
|
||||
|
||||
-- No match should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
|
||||
True
|
||||
------
|
||||
t
|
||||
@ -175,8 +175,16 @@ SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- PostgreSQL extention to allow omitting the escape character
|
||||
SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
|
||||
-- PostgreSQL extension to allow omitting the escape character;
|
||||
-- here the regexp is taken as Posix syntax
|
||||
SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
|
||||
cde
|
||||
-----
|
||||
cde
|
||||
(1 row)
|
||||
|
||||
-- With a parenthesized subexpression, return only what matches the subexpr
|
||||
SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
|
||||
cde
|
||||
-----
|
||||
cde
|
||||
|
@ -62,19 +62,24 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
|
||||
|
||||
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
|
||||
|
||||
-- T581 regular expression substring
|
||||
SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
|
||||
-- T581 regular expression substring (with SQL99's bizarre regexp syntax)
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
|
||||
|
||||
-- No match should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
|
||||
|
||||
-- Null inputs should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True";
|
||||
SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
|
||||
|
||||
-- PostgreSQL extention to allow omitting the escape character
|
||||
SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
|
||||
-- PostgreSQL extension to allow omitting the escape character;
|
||||
-- here the regexp is taken as Posix syntax
|
||||
SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
|
||||
|
||||
-- With a parenthesized subexpression, return only what matches the subexpr
|
||||
SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
|
||||
|
||||
|
||||
-- E021-11 position expression
|
||||
SELECT POSITION('4' IN '1234567890') = '4' AS "4";
|
||||
|
Loading…
Reference in New Issue
Block a user