Bring SIMILAR TO and SUBSTRING into some semblance of conformance with

the SQL99 standard. (I'm not sure that the character-class features are quite right, but that can be fixed later.) Document SQL99 and POSIX regexps as being different features; provide variants of SUBSTRING for each.
2024-12-21 08:29:39 +08:00 · 2002-09-22 17:27:25 +00:00 · 2002-09-22 17:27:25 +00:00 · 9946b83ded
commit 9946b83ded
parent ac355d558e
7 changed files with 683 additions and 381 deletions
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/func.sgml,v 1.124 2002/09/21 18:32:53 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/func.sgml,v 1.125 2002/09/22 17:27:23 tgl Exp $
 PostgreSQL documentation
 -->

@ -921,18 +921,31 @@ PostgreSQL documentation
      </row>

      <row>
-       <entry><function>substring</function>(<parameter>string</parameter> <optional>from <replaceable>pattern</replaceable></optional> <optional>for <replaceable>escape</replaceable></optional>)</entry>
+       <entry><function>substring</function>(<parameter>string</parameter> from <replaceable>pattern</replaceable>)</entry>
       <entry><type>text</type></entry>
       <entry>
-        extract regular expression
+        extract substring matching POSIX regular expression
        <indexterm>
         <primary>substring</primary>
        </indexterm>
       </entry>
-       <entry><literal>substring('Thomas' from 'mas$' for <optional>escape '\\'</optional>)</literal></entry>
+       <entry><literal>substring('Thomas' from '...$')</literal></entry>
       <entry><literal>mas</literal></entry>
      </row>

+      <row>
+       <entry><function>substring</function>(<parameter>string</parameter> from <replaceable>pattern</replaceable> for <replaceable>escape</replaceable>)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        extract substring matching SQL99 regular expression
+        <indexterm>
+         <primary>substring</primary>
+        </indexterm>
+       </entry>
+       <entry><literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal></entry>
+       <entry><literal>oma</literal></entry>
+      </row>
+
      <row>
       <entry>
        <function>trim</function>(<optional>leading | trailing | both</optional>
@ -960,6 +973,328 @@ PostgreSQL documentation
    </tgroup>
   </table>

+   <para>
+    Additional string manipulation functions are available and are
+    listed below.  Some of them are used internally to implement the
+    <acronym>SQL</acronym>-standard string functions listed above.
+   </para>
+
+   <table id="functions-string-other">
+    <title>Other String Functions</title>
+    <tgroup cols="5">
+     <thead>
+      <row>
+       <entry>Function</entry>
+       <entry>Return Type</entry>
+       <entry>Description</entry>
+       <entry>Example</entry>
+       <entry>Result</entry>
+      </row>
+     </thead>
+
+     <tbody>
+      <row>
+       <entry><function>ascii</function>(<type>text</type>)</entry>
+       <entry>integer</entry>
+       <entry>Returns the <acronym>ASCII</acronym> code of the first character of the argument.</entry>
+       <entry><literal>ascii('x')</literal></entry>
+       <entry><literal>120</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>btrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>trim</parameter> <type>text</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Remove (trim) the longest string consisting only of characters
+        in <parameter>trim</parameter> from the start and end of
+        <parameter>string</parameter>.
+       </entry>
+       <entry><literal>btrim('xyxtrimyyx','xy')</literal></entry>
+       <entry><literal>trim</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>chr</function>(<type>integer</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Returns the character with the given <acronym>ASCII</acronym> code.</entry>
+       <entry><literal>chr(65)</literal></entry>
+       <entry><literal>A</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <function>convert</function>(<parameter>string</parameter>
+        <type>text</type>,
+        <optional><parameter>src_encoding</parameter> <type>name</type>,</optional>
+        <parameter>dest_encoding</parameter> <type>name</type>)
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Converts string using <parameter>dest_encoding</parameter>.
+        The original encoding is specified by
+        <parameter>src_encoding</parameter>.  If
+        <parameter>src_encoding</parameter> is omitted, database
+        encoding is assumed.
+       </entry>
+       <entry><literal>convert('text_in_unicode', 'UNICODE', 'LATIN1')</literal></entry>
+       <entry><literal>text_in_unicode</literal> represented in ISO 8859-1</entry>
+      </row>
+
+      <row>
+       <entry>
+        <function>decode</function>(<parameter>string</parameter> <type>text</type>,
+        <parameter>type</parameter> <type>text</type>)
+       </entry>
+       <entry><type>bytea</type></entry>
+       <entry>
+        Decodes binary data from <parameter>string</parameter> previously 
+        encoded with encode().  Parameter type is same as in encode().
+       </entry>
+       <entry><literal>decode('MTIzAAE=', 'base64')</literal></entry>
+       <entry><literal>123\000\001</literal></entry>
+      </row>       
+
+      <row>
+       <entry>
+        <function>encode</function>(<parameter>data</parameter> <type>bytea</type>,
+        <parameter>type</parameter> <type>text</type>)
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Encodes binary data to <acronym>ASCII</acronym>-only representation.  Supported
+        types are: 'base64', 'hex', 'escape'.
+       </entry>
+       <entry><literal>encode('123\\000\\001', 'base64')</literal></entry>
+       <entry><literal>MTIzAAE=</literal></entry>
+      </row>       
+
+      <row>
+       <entry><function>initcap</function>(<type>text</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Converts first letter of each word (whitespace separated) to upper case.</entry>
+       <entry><literal>initcap('hi thomas')</literal></entry>
+       <entry><literal>Hi Thomas</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>length</function>(<parameter>string</parameter>)</entry>
+       <entry><type>integer</type></entry>
+       <entry>
+        length of string
+        <indexterm>
+         <primary>character strings</primary>
+         <secondary>length</secondary>
+        </indexterm>
+        <indexterm>
+         <primary>length</primary>
+         <secondary>character strings</secondary>
+         <see>character strings, length</see>
+        </indexterm>
+       </entry>
+       <entry><literal>length('jose')</literal></entry>
+       <entry><literal>4</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <function>lpad</function>(<parameter>string</parameter> <type>text</type>,
+        <parameter>length</parameter> <type>integer</type>
+        <optional>, <parameter>fill</parameter> <type>text</type></optional>)
+       </entry>
+       <entry>text</entry>
+       <entry>
+        Fills up the <parameter>string</parameter> to length
+        <parameter>length</parameter> by prepending the characters
+        <parameter>fill</parameter> (a space by default).  If the
+        <parameter>string</parameter> is already longer than
+        <parameter>length</parameter> then it is truncated (on the
+        right).
+       </entry>
+       <entry><literal>lpad('hi', 5, 'xy')</literal></entry>
+       <entry><literal>xyxhi</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>ltrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>text</parameter> <type>text</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Removes the longest string containing only characters from
+        <parameter>trim</parameter> from the start of the string.
+       </entry>
+       <entry><literal>ltrim('zzzytrim','xyz')</literal></entry>
+       <entry><literal>trim</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>pg_client_encoding</function>()</entry>
+       <entry><type>name</type></entry>
+       <entry>
+        Returns current client encoding name.
+       </entry>
+       <entry><literal>pg_client_encoding()</literal></entry>
+       <entry><literal>SQL_ASCII</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>quote_ident</function>(<parameter>string</parameter> text)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Returns the given string suitably quoted to be used as an identifier
+	in an SQL query string.
+	Quotes are added only if necessary (i.e., if the string contains
+	non-identifier characters or would be case-folded).
+	Embedded quotes are properly doubled.
+       </entry>
+       <entry><literal>quote_ident('Foo')</literal></entry>
+       <entry><literal>"Foo"</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>quote_literal</function>(<parameter>string</parameter> text)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Returns the given string suitably quoted to be used as a literal
+	in an SQL query string.
+	Embedded quotes and backslashes are properly doubled.
+       </entry>
+       <entry><literal>quote_literal('O\'Reilly')</literal></entry>
+       <entry><literal>'O''Reilly'</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>repeat</function>(<type>text</type>, <type>integer</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Repeat text a number of times.</entry>
+       <entry><literal>repeat('Pg', 4)</literal></entry>
+       <entry><literal>PgPgPgPg</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>replace</function>(<parameter>string</parameter> <type>text</type>,
+       <parameter>from</parameter> <type>text</type>,
+       <parameter>to</parameter> <type>text</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Replace all occurrences in <parameter>string</parameter> of substring
+        <parameter>from</parameter> with substring <parameter>to</parameter>
+       </entry>
+       <entry><literal>replace('abcdefabcdef', 'cd', 'XX')</literal></entry>
+       <entry><literal>abXXefabXXef</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <function>rpad</function>(<parameter>string</parameter> <type>text</type>,
+        <parameter>length</parameter> <type>integer</type>
+        <optional>, <parameter>fill</parameter> <type>text</type></optional>)
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Fills up the <parameter>string</parameter> to length
+        <parameter>length</parameter> by appending the characters
+        <parameter>fill</parameter> (a space by default).  If the
+        <parameter>string</parameter> is already longer than
+        <parameter>length</parameter> then it is truncated.
+       </entry>
+       <entry><literal>rpad('hi', 5, 'xy')</literal></entry>
+       <entry><literal>hixyx</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>rtrim</function>(<parameter>string</parameter>
+        text, <parameter>trim</parameter> text)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Removes the longest string containing only characters from
+        <parameter>trim</parameter> from the end of the string.
+       </entry>
+       <entry><literal>rtrim('trimxxxx','x')</literal></entry>
+       <entry><literal>trim</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>split_part</function>(<parameter>string</parameter> <type>text</type>,
+       <parameter>delimiter</parameter> <type>text</type>,
+       <parameter>column</parameter> <type>integer</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Split <parameter>string</parameter> on <parameter>delimiter</parameter>
+        returning the resulting (one based) <parameter>column</parameter> number.
+       </entry>
+       <entry><literal>split_part('abc~@~def~@~ghi','~@~',2)</literal></entry>
+       <entry><literal>def</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>strpos</function>(<parameter>string</parameter>, <parameter>substring</parameter>)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Locates specified substring. (same as
+        <literal>position(<parameter>substring</parameter> in
+         <parameter>string</parameter>)</literal>, but note the reversed
+        argument order)
+       </entry>
+       <entry><literal>strpos('high','ig')</literal></entry>
+       <entry><literal>2</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>substr</function>(<parameter>string</parameter>, <parameter>from</parameter> <optional>, <parameter>count</parameter></optional>)</entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Extracts specified substring. (same as
+        <literal>substring(<parameter>string</parameter> from <parameter>from</parameter> for <parameter>count</parameter>)</literal>)
+       </entry>
+       <entry><literal>substr('alphabet', 3, 2)</literal></entry>
+       <entry><literal>ph</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>to_ascii</function>(<type>text</type>
+        <optional>, <parameter>encoding</parameter></optional>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Converts text from multibyte encoding to <acronym>ASCII</acronym>.</entry>
+       <entry><literal>to_ascii('Karel')</literal></entry>
+       <entry><literal>Karel</literal></entry>
+      </row>
+
+      <row>
+       <entry><function>to_hex</function>(<parameter>number</parameter> <type>integer</type>
+       or <type>bigint</type>)</entry>
+       <entry><type>text</type></entry>
+       <entry>Convert <parameter>number</parameter> to its equivalent hexadecimal
+        representation.
+       </entry>
+       <entry><literal>to_hex(9223372036854775807::bigint)</literal></entry>
+       <entry><literal>7fffffffffffffff</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <function>translate</function>(<parameter>string</parameter>
+        <type>text</type>,
+        <parameter>from</parameter> <type>text</type>,
+        <parameter>to</parameter> <type>text</type>)
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Any character in <parameter>string</parameter> that matches a
+        character in the <parameter>from</parameter> set is replaced by
+        the corresponding character in the <parameter>to</parameter>
+        set.
+       </entry>
+       <entry><literal>translate('12345', '14', 'ax')</literal></entry>
+       <entry><literal>a23x5</literal></entry>
+      </row>       
+      
+     </tbody>
+    </tgroup>
+   </table>
+
+   <para>
+    The <function>to_ascii</function> function supports conversion from
+    LATIN1, LATIN2, WIN1250 (CP1250) only.
+   </para>
+
+
   <table id="conversion-names">
    <title>Available conversion names</title>
    <tgroup cols="3">
@ -1660,326 +1995,6 @@ PostgreSQL documentation
    </tgroup>
   </table>

-   <para>
-    Additional string manipulation functions are available and are
-    listed below.  Some of them are used internally to implement the
-    <acronym>SQL</acronym>-standard string functions listed above.
-   </para>
-
-   <table id="functions-string-other">
-    <title>Other String Functions</title>
-    <tgroup cols="5">
-     <thead>
-      <row>
-       <entry>Function</entry>
-       <entry>Return Type</entry>
-       <entry>Description</entry>
-       <entry>Example</entry>
-       <entry>Result</entry>
-      </row>
-     </thead>
-
-     <tbody>
-      <row>
-       <entry><function>ascii</function>(<type>text</type>)</entry>
-       <entry>integer</entry>
-       <entry>Returns the <acronym>ASCII</acronym> code of the first character of the argument.</entry>
-       <entry><literal>ascii('x')</literal></entry>
-       <entry><literal>120</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>btrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>trim</parameter> <type>text</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Remove (trim) the longest string consisting only of characters
-        in <parameter>trim</parameter> from the start and end of
-        <parameter>string</parameter>.
-       </entry>
-       <entry><literal>btrim('xyxtrimyyx','xy')</literal></entry>
-       <entry><literal>trim</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>chr</function>(<type>integer</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Returns the character with the given <acronym>ASCII</acronym> code.</entry>
-       <entry><literal>chr(65)</literal></entry>
-       <entry><literal>A</literal></entry>
-      </row>
-
-      <row>
-       <entry>
-        <function>convert</function>(<parameter>string</parameter>
-        <type>text</type>,
-        <optional><parameter>src_encoding</parameter> <type>name</type>,</optional>
-        <parameter>dest_encoding</parameter> <type>name</type>)
-       </entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Converts string using <parameter>dest_encoding</parameter>.
-        The original encoding is specified by
-        <parameter>src_encoding</parameter>.  If
-        <parameter>src_encoding</parameter> is omitted, database
-        encoding is assumed.
-       </entry>
-       <entry><literal>convert('text_in_unicode', 'UNICODE', 'LATIN1')</literal></entry>
-       <entry><literal>text_in_unicode</literal> represented in ISO 8859-1</entry>
-      </row>
-
-      <row>
-       <entry>
-        <function>decode</function>(<parameter>string</parameter> <type>text</type>,
-        <parameter>type</parameter> <type>text</type>)
-       </entry>
-       <entry><type>bytea</type></entry>
-       <entry>
-        Decodes binary data from <parameter>string</parameter> previously 
-        encoded with encode().  Parameter type is same as in encode().
-       </entry>
-       <entry><literal>decode('MTIzAAE=', 'base64')</literal></entry>
-       <entry><literal>123\000\001</literal></entry>
-      </row>       
-
-      <row>
-       <entry>
-        <function>encode</function>(<parameter>data</parameter> <type>bytea</type>,
-        <parameter>type</parameter> <type>text</type>)
-       </entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Encodes binary data to <acronym>ASCII</acronym>-only representation.  Supported
-        types are: 'base64', 'hex', 'escape'.
-       </entry>
-       <entry><literal>encode('123\\000\\001', 'base64')</literal></entry>
-       <entry><literal>MTIzAAE=</literal></entry>
-      </row>       
-
-      <row>
-       <entry><function>initcap</function>(<type>text</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Converts first letter of each word (whitespace separated) to upper case.</entry>
-       <entry><literal>initcap('hi thomas')</literal></entry>
-       <entry><literal>Hi Thomas</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>length</function>(<parameter>string</parameter>)</entry>
-       <entry><type>integer</type></entry>
-       <entry>
-        length of string
-        <indexterm>
-         <primary>character strings</primary>
-         <secondary>length</secondary>
-        </indexterm>
-        <indexterm>
-         <primary>length</primary>
-         <secondary>character strings</secondary>
-         <see>character strings, length</see>
-        </indexterm>
-       </entry>
-       <entry><literal>length('jose')</literal></entry>
-       <entry><literal>4</literal></entry>
-      </row>
-
-      <row>
-       <entry>
-        <function>lpad</function>(<parameter>string</parameter> <type>text</type>,
-        <parameter>length</parameter> <type>integer</type>
-        <optional>, <parameter>fill</parameter> <type>text</type></optional>)
-       </entry>
-       <entry>text</entry>
-       <entry>
-        Fills up the <parameter>string</parameter> to length
-        <parameter>length</parameter> by prepending the characters
-        <parameter>fill</parameter> (a space by default).  If the
-        <parameter>string</parameter> is already longer than
-        <parameter>length</parameter> then it is truncated (on the
-        right).
-       </entry>
-       <entry><literal>lpad('hi', 5, 'xy')</literal></entry>
-       <entry><literal>xyxhi</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>ltrim</function>(<parameter>string</parameter> <type>text</type>, <parameter>text</parameter> <type>text</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Removes the longest string containing only characters from
-        <parameter>trim</parameter> from the start of the string.
-       </entry>
-       <entry><literal>ltrim('zzzytrim','xyz')</literal></entry>
-       <entry><literal>trim</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>pg_client_encoding</function>()</entry>
-       <entry><type>name</type></entry>
-       <entry>
-        Returns current client encoding name.
-       </entry>
-       <entry><literal>pg_client_encoding()</literal></entry>
-       <entry><literal>SQL_ASCII</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>quote_ident</function>(<parameter>string</parameter> text)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Returns the given string suitably quoted to be used as an identifier
-	in an SQL query string.
-	Quotes are added only if necessary (i.e., if the string contains
-	non-identifier characters or would be case-folded).
-	Embedded quotes are properly doubled.
-       </entry>
-       <entry><literal>quote_ident('Foo')</literal></entry>
-       <entry><literal>"Foo"</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>quote_literal</function>(<parameter>string</parameter> text)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Returns the given string suitably quoted to be used as a literal
-	in an SQL query string.
-	Embedded quotes and backslashes are properly doubled.
-       </entry>
-       <entry><literal>quote_literal('O\'Reilly')</literal></entry>
-       <entry><literal>'O''Reilly'</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>repeat</function>(<type>text</type>, <type>integer</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Repeat text a number of times.</entry>
-       <entry><literal>repeat('Pg', 4)</literal></entry>
-       <entry><literal>PgPgPgPg</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>replace</function>(<parameter>string</parameter> <type>text</type>,
-       <parameter>from</parameter> <type>text</type>,
-       <parameter>to</parameter> <type>text</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Replace all occurrences in <parameter>string</parameter> of substring
-        <parameter>from</parameter> with substring <parameter>to</parameter>
-       </entry>
-       <entry><literal>replace('abcdefabcdef', 'cd', 'XX')</literal></entry>
-       <entry><literal>abXXefabXXef</literal></entry>
-      </row>
-
-      <row>
-       <entry>
-        <function>rpad</function>(<parameter>string</parameter> <type>text</type>,
-        <parameter>length</parameter> <type>integer</type>
-        <optional>, <parameter>fill</parameter> <type>text</type></optional>)
-       </entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Fills up the <parameter>string</parameter> to length
-        <parameter>length</parameter> by appending the characters
-        <parameter>fill</parameter> (a space by default).  If the
-        <parameter>string</parameter> is already longer than
-        <parameter>length</parameter> then it is truncated.
-       </entry>
-       <entry><literal>rpad('hi', 5, 'xy')</literal></entry>
-       <entry><literal>hixyx</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>rtrim</function>(<parameter>string</parameter>
-        text, <parameter>trim</parameter> text)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Removes the longest string containing only characters from
-        <parameter>trim</parameter> from the end of the string.
-       </entry>
-       <entry><literal>rtrim('trimxxxx','x')</literal></entry>
-       <entry><literal>trim</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>split_part</function>(<parameter>string</parameter> <type>text</type>,
-       <parameter>delimiter</parameter> <type>text</type>,
-       <parameter>column</parameter> <type>integer</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Split <parameter>string</parameter> on <parameter>delimiter</parameter>
-        returning the resulting (one based) <parameter>column</parameter> number.
-       </entry>
-       <entry><literal>split_part('abc~@~def~@~ghi','~@~',2)</literal></entry>
-       <entry><literal>def</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>strpos</function>(<parameter>string</parameter>, <parameter>substring</parameter>)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Locates specified substring. (same as
-        <literal>position(<parameter>substring</parameter> in
-         <parameter>string</parameter>)</literal>, but note the reversed
-        argument order)
-       </entry>
-       <entry><literal>strpos('high','ig')</literal></entry>
-       <entry><literal>2</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>substr</function>(<parameter>string</parameter>, <parameter>from</parameter> <optional>, <parameter>count</parameter></optional>)</entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Extracts specified substring. (same as
-        <literal>substring(<parameter>string</parameter> from <parameter>from</parameter> for <parameter>count</parameter>)</literal>)
-       </entry>
-       <entry><literal>substr('alphabet', 3, 2)</literal></entry>
-       <entry><literal>ph</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>to_ascii</function>(<type>text</type>
-        <optional>, <parameter>encoding</parameter></optional>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Converts text from multibyte encoding to <acronym>ASCII</acronym>.</entry>
-       <entry><literal>to_ascii('Karel')</literal></entry>
-       <entry><literal>Karel</literal></entry>
-      </row>
-
-      <row>
-       <entry><function>to_hex</function>(<parameter>number</parameter> <type>integer</type>
-       or <type>bigint</type>)</entry>
-       <entry><type>text</type></entry>
-       <entry>Convert <parameter>number</parameter> to its equivalent hexadecimal
-        representation.
-       </entry>
-       <entry><literal>to_hex(9223372036854775807::bigint)</literal></entry>
-       <entry><literal>7fffffffffffffff</literal></entry>
-      </row>
-
-      <row>
-       <entry>
-        <function>translate</function>(<parameter>string</parameter>
-        <type>text</type>,
-        <parameter>from</parameter> <type>text</type>,
-        <parameter>to</parameter> <type>text</type>)
-       </entry>
-       <entry><type>text</type></entry>
-       <entry>
-        Any character in <parameter>string</parameter> that matches a
-        character in the <parameter>from</parameter> set is replaced by
-        the corresponding character in the <parameter>to</parameter>
-        set.
-       </entry>
-       <entry><literal>translate('12345', '14', 'ax')</literal></entry>
-       <entry><literal>a23x5</literal></entry>
-      </row>       
-      
-     </tbody>
-    </tgroup>
-   </table>
-
-   <para>
-    The <function>to_ascii</function> function supports conversion from
-    LATIN1, LATIN2, WIN1250 (CP1250) only.
-   </para>
  </sect1>


@ -2171,16 +2186,16 @@ PostgreSQL documentation
  <title>Pattern Matching</title>

   <para>
-    There are two separate approaches to pattern matching provided by
+    There are three separate approaches to pattern matching provided by
    <productname>PostgreSQL</productname>:  the traditional
    <acronym>SQL</acronym> 
-    <function>LIKE</function> operator and the more recent
+    <function>LIKE</function> operator, the more recent
    <acronym>SQL99</acronym> 
-    <function>SIMILAR TO</function> operator implementing
+    <function>SIMILAR TO</function> operator, and
    <acronym>POSIX</acronym>-style regular expressions.
    Additionally, a pattern matching function,
-    <function>SUBSTRING</function>, is available, as defined in
-    <acronym>SQL99</acronym>.
+    <function>SUBSTRING</function>, is available, using either
+    <acronym>SQL99</acronym>-style or POSIX-style regular expressions.
   </para>

   <tip>
@ -2190,13 +2205,6 @@ PostgreSQL documentation
    </para>
   </tip>

-   <para>
-    Both <function>LIKE</function> and <function>SIMILAR TO</function>
-    are SQL-standard operators which are also available in alternate
-    forms as <productname>PostgreSQL</productname> operators; look at
-    <literal>~</literal> and <literal>~~</literal> for examples.
-   </para>
-
  <sect2 id="functions-like">
   <title><function>LIKE</function></title>

@ -2296,11 +2304,142 @@ PostgreSQL documentation
  </sect2>


-  <sect2 id="functions-regexp">
-   <title><function>SIMILAR TO</function> and <acronym>POSIX</acronym>
+  <sect2 id="functions-sql99-regexp">
+   <title><function>SIMILAR TO</function> and <acronym>SQL99</acronym>
     Regular Expressions</title>

-   <indexterm zone="functions-regexp">
+   <indexterm zone="functions-sql99-regexp">
+    <primary>regular expressions</primary>
+    <seealso>pattern matching</seealso>
+   </indexterm>
+
+   <indexterm>
+    <primary>similar to</primary>
+   </indexterm>
+
+   <indexterm>
+    <primary>substring</primary>
+   </indexterm>
+
+    <synopsis>
+<replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
+<replaceable>string</replaceable> NOT SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
+    </synopsis>
+
+    <para>
+     The <function>SIMILAR TO</function> operator returns true or false
+     depending on whether its pattern matches the given string.  It is
+     much like <function>LIKE</function>, except that it interprets the
+     pattern using <acronym>SQL99</acronym>'s definition of a regular
+     expression.
+     <acronym>SQL99</acronym>'s regular expressions are a curious cross
+     between <function>LIKE</function> notation and common regular expression
+     notation.
+    </para>
+
+    <para>
+     Like <function>LIKE</function>, the  <function>SIMILAR TO</function>
+     operator succeeds only if its pattern matches the entire string;
+     this is unlike common regular expression practice, wherein the pattern
+     may match any part of the string.
+     Also like
+     <function>LIKE</function>, <function>SIMILAR TO</function> uses
+     <literal>%</> and <literal>_</> as wildcard characters denoting
+     any string and any single character, respectively (these are
+     comparable to <literal>.*</> and <literal>.</> in POSIX regular
+     expressions).
+    </para>
+
+    <para>
+     In addition to these facilities borrowed from <function>LIKE</function>,
+     <function>SIMILAR TO</function> supports these pattern-matching
+     metacharacters borrowed from POSIX regular expressions:
+
+    <itemizedlist>
+     <listitem>
+      <para>
+       <literal>|</literal> denotes alternation (either of two alternatives).
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <literal>*</literal> denotes repetition of the previous item zero
+       or more times.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <literal>+</literal> denotes repetition of the previous item one
+       or more times.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       Parentheses <literal>()</literal> may be used to group items into
+       a single logical item.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       A bracket expression <literal>[...]</literal> specifies a character
+       class, just as in POSIX regular expressions.
+      </para>
+     </listitem>
+    </itemizedlist>
+
+     Notice that bounded repetition (<literal>?</> and <literal>{...}</>)
+     are not provided, though they exist in POSIX.  Also, dot (<literal>.</>)
+     is not a metacharacter.
+    </para>
+
+    <para>
+     As with <function>LIKE</>, a backslash disables the special meaning
+     of any of these metacharacters; or a different escape character can
+     be specified with <literal>ESCAPE</>.
+    </para>
+
+    <informalexample>
+     <para>
+      Some examples:
+      <programlisting>
+'abc' SIMILAR TO 'abc'      <lineannotation>true</lineannotation>
+'abc' SIMILAR TO 'a'        <lineannotation>false</lineannotation>
+'abc' SIMILAR TO '%(b|d)%'  <lineannotation>true</lineannotation>
+'abc' SIMILAR TO '(b|c)%'   <lineannotation>false</lineannotation>
+      </programlisting>
+     </para>
+    </informalexample>
+
+    <para>
+     The SUBSTRING function with three parameters,
+     <function>SUBSTRING</function>(<parameter>string</parameter> FROM
+     <replaceable>pattern</replaceable> FOR
+     <replaceable>escape</replaceable>), provides extraction of a substring
+     that matches a SQL99 regular expression pattern.  As with SIMILAR TO,
+     the specified pattern must match to the entire data string, else the
+     function fails and returns NULL.  To indicate the part of the pattern
+     that should be returned on success, SQL99 specifies that the pattern
+     must contain two occurrences of the escape character followed by
+     double quote (<literal>"</>).  The text matching the portion of the
+     pattern between these markers is returned.
+    </para>
+
+    <informalexample>
+     <para>
+      Some examples:
+      <programlisting>
+SUBSTRING('foobar' FROM '%#"o_b#"%' FOR '#')   <lineannotation>oob</lineannotation>
+SUBSTRING('foobar' FROM '#"o_b#"%' FOR '#')    <lineannotation>NULL</lineannotation>
+      </programlisting>
+     </para>
+    </informalexample>
+
+  </sect2>
+
+  <sect2 id="functions-posix-regexp">
+   <title><acronym>POSIX</acronym> Regular Expressions</title>
+
+   <indexterm zone="functions-posix-regexp">
    <primary>regular expressions</primary>
    <seealso>pattern matching</seealso>
   </indexterm>
@ -2341,12 +2480,6 @@ PostgreSQL documentation
        <entry>Does not match regular expression, case insensitive</entry>
        <entry><literal>'thomas' !~* '.*vadim.*'</literal></entry>
       </row>
-
-       <row>
-        <entry> <literal>SIMILAR TO</literal> </entry>
-        <entry>Matches regular expression, case sensitive</entry>
-        <entry><literal>'thomas' SIMILAR TO '.*thomas.*'</literal></entry>
-       </row>
      </tbody>
     </tgroup>
    </table>
@ -2354,7 +2487,8 @@ PostgreSQL documentation
    <para>
     <acronym>POSIX</acronym> regular expressions provide a more
     powerful means for 
-     pattern matching than the <function>LIKE</function> function.
+     pattern matching than the <function>LIKE</function> and
+     <function>SIMILAR TO</> operators.
     Many Unix tools such as <command>egrep</command>,
     <command>sed</command>, or <command>awk</command> use a pattern
     matching language that is similar to the one described here.
@ -2379,10 +2513,34 @@ PostgreSQL documentation
     <para>
      Some examples:
      <programlisting>
-'abc' SIMILAR TO 'abc'    <lineannotation>true</lineannotation>
-'abc' SIMILAR TO '^a'     <lineannotation>true</lineannotation>
-'abc' SIMILAR TO '(b|d)'  <lineannotation>true</lineannotation>
-'abc' SIMILAR TO '^(b|c)' <lineannotation>false</lineannotation>
+'abc' ~ 'abc'    <lineannotation>true</lineannotation>
+'abc' ~ '^a'     <lineannotation>true</lineannotation>
+'abc' ~ '(b|d)'  <lineannotation>true</lineannotation>
+'abc' ~ '^(b|c)' <lineannotation>false</lineannotation>
+      </programlisting>
+     </para>
+    </informalexample>
+
+    <para>
+     The SUBSTRING function with two parameters,
+     <function>SUBSTRING</function>(<parameter>string</parameter> FROM
+     <replaceable>pattern</replaceable>), provides extraction of a substring
+     that matches a POSIX regular expression pattern.  It returns NULL if
+     there is no match, otherwise the portion of the text that matched the
+     pattern.  But if the pattern contains any parentheses, the portion
+     of the text that matched the first parenthesized subexpression (the
+     one whose left parenthesis comes first) is
+     returned.  You can always put parentheses around the whole expression
+     if you want to use parentheses within it without triggering this
+     exception.
+    </para>
+
+    <informalexample>
+     <para>
+      Some examples:
+      <programlisting>
+SUBSTRING('foobar' FROM 'o.b')     <lineannotation>oob</lineannotation>
+SUBSTRING('foobar' FROM 'o(.)b')   <lineannotation>o</lineannotation>
      </programlisting>
     </para>
    </informalexample>
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@ -11,7 +11,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.367 2002/09/18 21:35:21 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.368 2002/09/22 17:27:23 tgl Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@ -5644,22 +5644,40 @@ a_expr:		c_expr									{ $$ = $1; }
 				}

 			| a_expr SIMILAR TO a_expr				%prec SIMILAR
-				{ $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); }
+				{
+					A_Const *c = makeNode(A_Const);
+					FuncCall *n = makeNode(FuncCall);
+					c->val.type = T_Null;
+					n->funcname = SystemFuncName("similar_escape");
+					n->args = makeList2($4, (Node *) c);
+					n->agg_star = FALSE;
+					n->agg_distinct = FALSE;
+					$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
+				}
 			| a_expr SIMILAR TO a_expr ESCAPE a_expr
 				{
 					FuncCall *n = makeNode(FuncCall);
-					n->funcname = SystemFuncName("like_escape");
+					n->funcname = SystemFuncName("similar_escape");
 					n->args = makeList2($4, $6);
 					n->agg_star = FALSE;
 					n->agg_distinct = FALSE;
 					$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
 				}
 			| a_expr NOT SIMILAR TO a_expr			%prec SIMILAR
-				{ $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); }
+				{
+					A_Const *c = makeNode(A_Const);
+					FuncCall *n = makeNode(FuncCall);
+					c->val.type = T_Null;
+					n->funcname = SystemFuncName("similar_escape");
+					n->args = makeList2($5, (Node *) c);
+					n->agg_star = FALSE;
+					n->agg_distinct = FALSE;
+					$$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n);
+				}
 			| a_expr NOT SIMILAR TO a_expr ESCAPE a_expr
 				{
 					FuncCall *n = makeNode(FuncCall);
-					n->funcname = SystemFuncName("like_escape");
+					n->funcname = SystemFuncName("similar_escape");
 					n->args = makeList2($5, $7);
 					n->agg_star = FALSE;
 					n->agg_distinct = FALSE;
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $
 *
 *		Alistair Crooks added the code for the regex caching
 *		agc - cached the regular expressions used - there's a good chance
@ -317,8 +317,7 @@ textregexsubstr(PG_FUNCTION_ARGS)
 	char	   *sterm;
 	int			len;
 	bool		match;
-	int			nmatch = 1;
-	regmatch_t	pmatch;
+	regmatch_t	pmatch[2];

 	/* be sure sterm is null-terminated */
 	len = VARSIZE(s) - VARHDRSZ;
@ -327,21 +326,131 @@ textregexsubstr(PG_FUNCTION_ARGS)
 	sterm[len] = '\0';

 	/*
-	 * We need the match info back from the pattern match to be able to
-	 * actually extract the substring. It seems to be adequate to pass in
-	 * a structure to return only one result.
+	 * We pass two regmatch_t structs to get info about the overall match
+	 * and the match for the first parenthesized subexpression (if any).
+	 * If there is a parenthesized subexpression, we return what it matched;
+	 * else return what the whole regexp matched.
 	 */
-	match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch);
+	match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch);
+
 	pfree(sterm);

 	/* match? then return the substring matching the pattern */
 	if (match)
 	{
+		int		so,
+				eo;
+
+		so = pmatch[1].rm_so;
+		eo = pmatch[1].rm_eo;
+		if (so < 0 || eo < 0)
+		{
+			/* no parenthesized subexpression */
+			so = pmatch[0].rm_so;
+			eo = pmatch[0].rm_eo;
+		}
+
 		return (DirectFunctionCall3(text_substr,
 									PointerGetDatum(s),
-									Int32GetDatum(pmatch.rm_so + 1),
-							Int32GetDatum(pmatch.rm_eo - pmatch.rm_so)));
+									Int32GetDatum(so + 1),
+									Int32GetDatum(eo - so)));
 	}

 	PG_RETURN_NULL();
 }
+
+/* similar_escape()
+ * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
+ * our regexp engine.
+ */
+Datum
+similar_escape(PG_FUNCTION_ARGS)
+{
+	text	   *pat_text;
+	text	   *esc_text;
+	text	   *result;
+	unsigned char *p,
+			   *e,
+			   *r;
+	int			plen,
+				elen;
+	bool		afterescape = false;
+	int			nquotes = 0;
+
+	/* This function is not strict, so must test explicitly */
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+	pat_text = PG_GETARG_TEXT_P(0);
+	p = VARDATA(pat_text);
+	plen = (VARSIZE(pat_text) - VARHDRSZ);
+	if (PG_ARGISNULL(1))
+	{
+		/* No ESCAPE clause provided; default to backslash as escape */
+		e = "\\";
+		elen = 1;
+	}
+	else
+	{
+		esc_text = PG_GETARG_TEXT_P(1);
+		e = VARDATA(esc_text);
+		elen = (VARSIZE(esc_text) - VARHDRSZ);
+		if (elen == 0)
+			e = NULL;			/* no escape character */
+		else if (elen != 1)
+			elog(ERROR, "ESCAPE string must be empty or one character");
+	}
+
+	/* We need room for ^, $, and up to 2 output bytes per input byte */
+	result = (text *) palloc(VARHDRSZ + 2 + 2 * plen);
+	r = VARDATA(result);
+
+	*r++ = '^';
+
+	while (plen > 0)
+	{
+		unsigned char pchar = *p;
+
+		if (afterescape)
+		{
+			if (pchar == '"')	/* for SUBSTRING patterns */
+				*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
+			else
+			{
+				*r++ = '\\';
+				*r++ = pchar;
+			}
+			afterescape = false;
+		}
+		else if (e && pchar == *e)
+		{
+			/* SQL99 escape character; do not send to output */
+			afterescape = true;
+		}
+		else if (pchar == '%')
+		{
+			*r++ = '.';
+			*r++ = '*';
+		}
+		else if (pchar == '_')
+		{
+			*r++ = '.';
+		}
+		else if (pchar == '\\' || pchar == '.' || pchar == '?' ||
+				 pchar == '{')
+		{
+			*r++ = '\\';
+			*r++ = pchar;
+		}
+		else
+		{
+			*r++ = pchar;
+		}
+		p++, plen--;
+	}
+
+	*r++ = '$';
+	
+	VARATT_SIZEP(result) = r - ((unsigned char *) result);
+
+	PG_RETURN_TEXT_P(result);
+}
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: pg_proc.h,v 1.272 2002/09/18 21:35:23 tgl Exp $
+ * $Id: pg_proc.h,v 1.273 2002/09/22 17:27:23 tgl Exp $
 *
 * NOTES
 *	  The script catalog/genbki.sh reads this file and generates .bki
@ -2076,6 +2076,9 @@ DESCR("convert int4 to char");
 DATA(insert OID = 1622 (  repeat			PGNSP PGUID 12 f f t f i 2 25 "25 23"  repeat - _null_ ));
 DESCR("replicate string int4 times");

+DATA(insert OID = 1623 (  similar_escape	PGNSP PGUID 12 f f f f i 2 25 "25 25" similar_escape - _null_ ));
+DESCR("convert SQL99 regexp pattern to POSIX style");
+
 DATA(insert OID = 1624 (  mul_d_interval	PGNSP PGUID 12 f f t f i 2 1186 "701 1186"	mul_d_interval - _null_ ));

 DATA(insert OID = 1633 (  texticlike		PGNSP PGUID 12 f f t f i 2 16 "25 25" texticlike - _null_ ));
@ -2087,7 +2090,7 @@ DESCR("matches LIKE expression, case-insensitive");
 DATA(insert OID = 1636 (  nameicnlike		PGNSP PGUID 12 f f t f i 2 16 "19 25"  nameicnlike - _null_ ));
 DESCR("does not match LIKE expression, case-insensitive");
 DATA(insert OID = 1637 (  like_escape		PGNSP PGUID 12 f f t f i 2 25 "25 25" like_escape - _null_ ));
-DESCR("convert match pattern to use backslash escapes");
+DESCR("convert LIKE pattern to use backslash escapes");

 DATA(insert OID = 1689 (  update_pg_pwd_and_pg_group  PGNSP PGUID 12 f f t f v 0 2279  ""	update_pg_pwd_and_pg_group - _null_ ));
 DESCR("update pg_pwd and pg_group files");
@ -2784,7 +2787,7 @@ DESCR("matches LIKE expression");
 DATA(insert OID = 2008 (  notlike		   PGNSP PGUID 12 f f t f i 2 16 "17 17"	byteanlike - _null_ ));
 DESCR("does not match LIKE expression");
 DATA(insert OID = 2009 (  like_escape	   PGNSP PGUID 12 f f t f i 2 17 "17 17" like_escape_bytea - _null_ ));
-DESCR("convert match pattern to use backslash escapes");
+DESCR("convert LIKE pattern to use backslash escapes");
 DATA(insert OID = 2010 (  length		   PGNSP PGUID 12 f f t f i 1 23 "17"  byteaoctetlen - _null_ ));
 DESCR("octet length");
 DATA(insert OID = 2011 (  byteacat		   PGNSP PGUID 12 f f t f i 2 17 "17 17"	byteacat - _null_ ));
@ -2889,9 +2892,9 @@ DATA(insert OID = 2072 (  date_mi_interval	PGNSP PGUID 14 f f t f i 2 1114 "1082
 DESCR("subtract");

 DATA(insert OID = 2073 (  substring			PGNSP PGUID 12 f f t f i 2 25 "25 25"	textregexsubstr - _null_ ));
-DESCR("substitutes regular expression");
-DATA(insert OID = 2074 (  substring			PGNSP PGUID 14 f f t f i 3 25 "25 25 25"	"select substring($1, like_escape($2, $3))" - _null_ ));
-DESCR("substitutes regular expression with escape argument");
+DESCR("extracts text matching regular expression");
+DATA(insert OID = 2074 (  substring			PGNSP PGUID 14 f f t f i 3 25 "25 25 25"	"select substring($1, similar_escape($2, $3))" - _null_ ));
+DESCR("extracts text matching SQL99 regular expression");

 DATA(insert OID = 2075 (  bit				PGNSP PGUID 12 f f t f i 1 1560 "20"	bitfromint8 - _null_ ));
 DESCR("int8 to bitstring");
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: builtins.h,v 1.201 2002/09/19 22:48:34 tgl Exp $
+ * $Id: builtins.h,v 1.202 2002/09/22 17:27:25 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -371,6 +371,7 @@ extern Datum nameicregexne(PG_FUNCTION_ARGS);
 extern Datum texticregexeq(PG_FUNCTION_ARGS);
 extern Datum texticregexne(PG_FUNCTION_ARGS);
 extern Datum textregexsubstr(PG_FUNCTION_ARGS);
+extern Datum similar_escape(PG_FUNCTION_ARGS);

 /* regproc.c */
 extern Datum regprocin(PG_FUNCTION_ARGS);
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@ -142,15 +142,15 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
 t
 (1 row)

-- T581 regular expression substring
-SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
+-- T581 regular expression substring (with SQL99's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
 bcd 
 -----
 bcd
 (1 row)

 -- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
 True 
 ------
 t
@ -175,8 +175,16 @@ SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
 t
 (1 row)

-- PostgreSQL extention to allow omitting the escape character
-SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
+-- PostgreSQL extension to allow omitting the escape character;
+-- here the regexp is taken as Posix syntax
+SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
+ cde 
+-----
+ cde
+(1 row)
+
+-- With a parenthesized subexpression, return only what matches the subexpr
+SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
 cde 
 -----
 cde
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@ -62,19 +62,24 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";

 SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";

-- T581 regular expression substring
-SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
+-- T581 regular expression substring (with SQL99's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";

 -- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";

 -- Null inputs should return NULL
 SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True";
 SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True";
 SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";

-- PostgreSQL extention to allow omitting the escape character
-SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
+-- PostgreSQL extension to allow omitting the escape character;
+-- here the regexp is taken as Posix syntax
+SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
+
+-- With a parenthesized subexpression, return only what matches the subexpr
+SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
+

 -- E021-11 position expression
 SELECT POSITION('4' IN '1234567890') = '4' AS "4";