gcc/gcc/ada/g-regexp.adb
Robert Dewar 9de61fcb9b a-secain.adb, [...]: Minor reformatting
2005-11-14  Robert Dewar  <dewar@adacore.com>

	* a-secain.adb, a-slcain.adb, a-shcain.adb, a-chtgke.ads, a-chtgke.adb,
	a-stwiha.adb, a-strhas.adb, a-chzla1.ads, a-chzla9.ads, a-stzbou.adb,
	a-stzbou.ads, a-stzfix.adb, a-stzhas.adb, a-stzmap.adb, a-stzmap.ads,
	a-stzsea.adb, a-stzsea.ads, a-stzsup.adb, a-stzsup.ads, a-stzunb.adb,
	a-stzunb.ads, a-szuzha.adb, a-szuzha.ads, a-szuzti.adb, a-szuzti.ads,
	a-ztcoau.adb, a-ztcoau.ads, a-ztcoio.adb, a-ztcstr.adb, a-ztcstr.ads,
	a-ztdeau.adb, a-ztdeau.ads, a-ztdeio.adb, a-ztdeio.ads, a-ztedit.adb,
	a-ztedit.ads, a-ztenau.ads, a-ztenio.adb, a-ztenio.ads, a-ztexio.adb,
	a-ztexio.ads, a-ztfiio.adb, a-ztfiio.ads, a-ztflau.adb, a-ztflau.ads,
	a-ztflio.adb, a-ztflio.ads, a-ztgeau.adb, a-ztgeau.ads, a-ztinau.adb,
	a-ztinau.ads, a-ztinio.adb, a-ztmoau.ads, a-ztmoio.adb, a-ztmoio.ads,
	a-zttest.adb, g-enblsp-vms-alpha.adb, g-enblsp-vms-alpha.adb,
	g-enblsp-vms-ia64.adb, g-enblsp-vms-ia64.adb, system-linux-hppa.ads,
	a-chacon.adb, a-chacon.ads, a-wichun.adb, a-wichun.ads, a-zchuni.adb,
	a-zchuni.ads, g-trasym-vms-alpha.adb, g-trasym-vms-ia64.adb,
	system-hpux-ia64.ads, g-soccon-unixware.ads, g-soliop-unixware.ads,
	g-soccon-interix.ads, g-soliop-solaris.ads, g-eacodu-vms.adb,
	g-expect-vms.adb, g-socthi-vms.adb, g-soliop-mingw.ads,
	a-intnam-unixware.ads, a-intnam-lynxos.ads, a-intnam-tru64.ads,
	a-intnam-aix.ads, a-intnam-linux.ads, a-intnam-linux.ads,
	a-intnam-dummy.ads, a-numaux-libc-x86.ads, a-intnam-interix.ads,
	a-intnam-solaris.ads, a-calend-vms.adb, a-calend-vms.ads,
	a-intnam-vms.ads, a-calend-mingw.adb, a-intnam-mingw.ads,
	a-intnam-vxworks.ads, a-numaux-vxworks.ads, system-unixware.ads,
	system-linux-ia64.ads, a-intnam-freebsd.ads, system-freebsd-x86.ads,
	system-lynxos-ppc.ads, system-linux-x86_64.ads, a-stunha.adb,
	a-cgaaso.ads, a-cgaaso.adb, a-chtgop.adb, a-cgcaso.adb, a-cgarso.adb,
	a-cohata.ads, a-crbtgk.adb, a-crbltr.ads, a-coprnu.adb, a-rbtgso.adb,
	a-intnam-darwin.ads, system-darwin-ppc.ads, gprmake.adb, makegpr.ads,
	system-tru64.ads, system-aix.ads, system-solaris-x86.ads,
	system-irix-o32.ads, s-interr-sigaction.adb, system-irix-n32.ads,
	s-parame-mingw.adb, system-hpux.ads, s-traceb-hpux.adb,
	system-linux-x86.ads, s-inmaop-dummy.adb, system-os2.ads,
	system-interix.ads, system-solaris-sparc.ads,
	system-solaris-sparcv9.ads, s-inmaop-vms.adb,
	s-mastop-vms.adb, expander.adb, expander.ads, s-gloloc-mingw.adb,
	system-mingw.ads, system-vms-zcx.ads, s-osinte-fsu.adb,
	s-traceb-mastop.adb, a-exextr.adb, a-exstat.adb, a-filico.ads,
	a-finali.ads, a-interr.ads, a-intsig.adb, a-intsig.ads,
	a-except.ads, a-numaux-x86.ads, a-astaco.adb, a-calend.adb,
	a-calend.ads, a-chahan.adb, a-chahan.ads, a-chlat9.ads,
	a-colien.adb, a-colien.ads, a-colire.adb, a-colire.ads,
	a-comlin.adb, a-comlin.ads, a-cwila1.ads, a-cwila9.ads,
	a-elchha.adb, a-decima.adb, a-decima.ads, a-diocst.ads,
	a-direio.adb, a-direio.ads, a-excach.adb, a-excach.adb,
	a-exctra.ads, ali-util.adb, a-ngcefu.adb, a-ngcoty.adb,
	a-ngcoty.ads, a-nudira.adb, a-nudira.ads, a-nuflra.adb,
	a-numaux.ads, a-reatim.ads, a-sequio.adb, a-sequio.ads,
	a-siocst.ads, a-ssicst.ads, a-stmaco.ads, a-storio.adb,
	a-strbou.adb, a-strbou.ads, a-stream.ads, a-strfix.adb,
	a-strmap.adb, a-strmap.ads, a-strsea.adb, a-strsea.ads,
	a-strsup.adb, a-strsup.ads, a-strunb.adb, a-strunb.ads,
	a-stwibo.adb, a-stwibo.ads, a-stwifi.adb, a-stwima.adb,
	a-stwima.ads, a-stwise.adb, a-stwise.ads, a-stwisu.adb,
	a-stwisu.ads, a-stwiun.adb, a-stwiun.ads, a-suteio.adb,
	a-suteio.ads, a-swmwco.ads, a-swuwti.adb, a-swuwti.ads,
	a-sytaco.adb, a-sytaco.ads, a-tasatt.adb, a-taside.adb,
	a-taside.ads, a-teioed.adb, a-textio.adb, a-textio.ads,
	a-ticoau.adb, a-ticoau.ads, a-ticoio.adb, a-tideau.adb,
	a-tideio.adb, a-tienau.adb, a-tienio.adb, a-tifiio.adb,
	a-tiflio.adb, a-tigeau.adb, a-tigeau.ads, a-tiinau.adb,
	a-tiinio.adb, a-timoau.adb, a-timoio.adb, a-timoio.ads,
	a-tiocst.ads, a-titest.adb, atree.adb, a-witeio.adb,
	a-witeio.ads, a-wtcoau.adb, a-wtcoau.ads, a-wtcoio.adb,
	a-wtcstr.ads, a-wtdeau.adb, a-wtdeio.adb, a-wtedit.adb,
	a-wtedit.ads, a-wtenau.adb, a-wtenio.adb, a-wtfiio.adb,
	a-wtflio.adb, a-wtgeau.adb, a-wtinau.adb, a-wtinio.adb,
	a-wtmoau.adb, a-wtmoio.adb, a-wttest.adb, back_end.adb,
	bindgen.adb, butil.adb, butil.ads, checks.ads, cio.c, comperr.adb,
	csets.ads, cstand.adb, debug.ads, elists.ads, errno.c, errout.adb,
	errout.ads, erroutc.adb, erroutc.ads, errutil.adb, errutil.ads,
	errutil.ads, err_vars.ads, eval_fat.adb, exp_ch11.adb, exp_ch11.ads,
	exp_ch2.adb, exp_ch7.ads, exp_imgv.ads, exp_pakd.adb, exp_prag.adb,
	exp_prag.ads, exp_tss.adb, exp_tss.ads, exp_vfpt.ads, fe.h, fmap.adb,
	freeze.ads, frontend.adb, frontend.ads, g-arrspl.adb, g-arrspl.ads,
	g-awk.adb, g-awk.ads, g-boumai.ads, g-calend.adb, g-calend.ads,
	g-catiio.adb, g-comlin.adb, g-comlin.ads, g-comlin.ads, g-comlin.ads,
	g-comver.adb, g-crc32.adb, g-crc32.ads, g-ctrl_c.ads, g-curexc.ads,
	g-debpoo.ads, g-debuti.adb, g-diopit.adb, g-diopit.ads, g-dirope.adb,
	g-dirope.ads, g-dyntab.adb, g-dyntab.ads, g-excact.adb, g-excact.ads,
	g-except.ads, g-exctra.adb, g-exctra.ads, g-expect.ads, g-flocon.ads,
	g-hesorg.ads, g-io.adb, g-locfil.ads, g-md5.adb, g-md5.ads, g-md5.ads,
	g-moreex.adb, g-signal.ads, g-signal.adb, gnatbind.ads, gnatchop.adb,
	gnatcmd.adb, gnatfind.adb, gnatlbr.adb, gnatmake.ads, gnatmem.adb,
	gnatprep.adb, gnatprep.ads, gnatsym.adb, gnatxref.adb, g-os_lib.adb,
	g-os_lib.ads, g-pehage.adb, g-pehage.ads, gprep.ads, g-regexp.adb,
	g-regexp.ads, g-regist.adb, g-regist.ads, g-regpat.ads, g-semaph.adb,
	g-socthi.adb, g-soliop.ads, g-spipat.adb, g-spipat.ads, g-sptabo.ads,
	g-sptain.ads, g-sptavs.ads, g-string.ads, g-tasloc.adb, g-tasloc.ads,
	g-trasym.adb, g-trasym.ads, i-fortra.adb, i-fortra.ads, inline.adb,
	layout.adb, live.adb, make.ads, makeutl.ads, makeutl.adb, mdll-fil.adb,
	mdll-fil.ads, mdll-utl.ads, memroot.ads, memtrack.adb, mlib.ads,
	mlib-fil.adb, mlib-fil.ads, mlib-prj.ads, mlib-utl.adb, mlib-utl.ads,
	nlists.adb, nlists.ads, osint.adb, osint.ads, osint-c.adb, osint-l.adb,
	osint-l.ads, osint-m.ads, output.adb, par.adb, par.adb, par.ads,
	par-ch11.adb, par-ch12.adb, par-ch2.adb, par-ch4.adb, par-ch5.adb,
	par-ch6.adb, par-ch9.adb, par-endh.adb, par-labl.adb, par-load.adb,
	par-tchk.adb, prep.adb, prepcomp.adb, prepcomp.ads, prj-attr.ads,
	prj-com.ads, prj-dect.adb, prj-dect.ads, prj-env.ads, prj-err.ads,
	prj-ext.ads, prj-makr.adb, prj-makr.ads, prj-nmsc.ads, prj-pars.adb,
	prj-pars.ads, prj-part.ads, prj-pp.ads, prj-proc.ads, prj-strt.adb,
	prj-strt.ads, prj-tree.adb, prj-util.adb, prj-util.ads, rtsfind.adb,
	sem.adb, sem.ads, sem_case.adb, sem_case.ads, sem_ch11.adb,
	sem_ch4.adb, sem_ch6.ads, sem_ch7.ads, sem_dist.ads, sem_elab.ads,
	sem_elim.ads, sem_eval.ads, sem_intr.ads, sem_maps.adb, sem_maps.ads,
	sem_maps.ads, sem_res.ads, sem_type.ads, sfn_scan.adb, sfn_scan.ads,
	s-imgwch.ads, s-imgwiu.adb, s-imgwiu.ads, s-inmaop.ads, sinput.adb,
	sinput.ads, s-pack03.adb, s-pack03.ads, s-pack05.adb, s-pack05.ads,
	s-pack06.adb, s-pack06.ads, s-pack07.adb, s-pack07.ads, s-pack09.adb,
	s-pack09.ads, s-pack10.adb, s-pack10.ads, s-pack11.adb, s-pack11.ads,
	s-pack12.adb, s-pack12.ads, s-pack13.adb, s-pack13.ads, s-pack14.adb,
	s-pack14.ads, s-pack15.adb, s-pack15.ads, s-pack17.adb, s-pack17.ads,
	s-pack18.adb, s-pack18.ads, s-pack19.adb, s-pack19.ads, s-pack20.adb,
	s-pack20.ads, s-pack21.adb, s-pack21.ads, s-pack22.adb, s-pack22.ads,
	s-pack23.adb, s-pack23.ads, s-pack24.adb, s-pack24.ads, s-pack25.adb,
	s-pack25.ads, s-pack26.adb, s-pack26.ads, s-pack27.adb, s-pack27.ads,
	s-pack28.adb, s-pack28.ads, s-pack29.adb, s-pack29.ads, s-pack30.adb,
	s-pack30.ads, s-pack31.adb, s-pack31.ads, s-pack33.adb, s-pack33.ads,
	s-pack34.adb, s-pack34.ads, s-pack35.adb, s-pack35.ads, s-pack36.adb,
	s-pack36.ads, s-pack37.adb, s-pack37.ads, s-pack38.adb, s-pack38.ads,
	s-pack39.adb, s-pack39.ads, s-pack40.adb, s-pack40.ads, s-pack41.adb,
	s-pack41.ads, s-pack42.adb, s-pack42.ads, s-pack43.adb, s-pack43.ads,
	s-pack44.adb, s-pack44.ads, s-pack45.adb, s-pack45.ads, s-pack46.adb,
	s-pack46.ads, s-pack47.adb, s-pack47.ads, s-pack48.adb, s-pack48.ads,
	s-pack49.adb, s-pack49.ads, s-pack50.adb, s-pack50.ads, s-pack51.adb,
	s-pack51.ads, s-pack52.adb, s-pack52.ads, s-pack53.adb, s-pack53.ads,
	s-pack54.adb, s-pack54.ads, s-pack55.adb, s-pack55.ads, s-pack56.adb,
	s-pack56.ads, s-pack57.adb, s-pack57.ads, s-pack58.adb, s-pack58.ads,
	s-pack59.adb, s-pack59.ads, s-pack60.adb, s-pack60.adb, s-pack60.ads,
	s-pack61.adb, s-pack61.ads, s-pack62.adb, s-pack62.ads, s-pack63.adb,
	s-pack63.ads, s-parint.adb, s-parint.adb, s-parint.ads, sprint.ads,
	s-purexc.ads, s-restri.ads, s-restri.adb, s-scaval.adb, s-scaval.ads,
	s-secsta.adb, s-secsta.ads, s-sequio.adb, s-sequio.ads, stand.ads,
	s-tasuti.adb, s-traceb.adb, s-traceb.ads, stringt.adb, stringt.ads,
	styleg.ads, s-valboo.adb, s-valboo.ads, s-valcha.adb, s-valcha.ads,
	s-valdec.adb, s-valdec.ads, s-valint.adb, s-valint.ads, s-valint.ads,
	s-vallld.adb, s-vallld.ads, s-vallli.adb, s-vallli.ads, s-valllu.adb,
	s-valllu.ads, s-valrea.adb, s-valrea.ads, s-valuns.adb, s-valuns.ads,
	s-valuti.adb, s-valuti.ads, s-valwch.ads, s-veboop.adb, s-veboop.ads,
	s-vercon.adb, s-vercon.ads, s-wchcnv.adb, s-wchcnv.ads, s-wchcon.ads,
	s-wchjis.adb, s-wchjis.ads, s-wchstw.adb, s-wchstw.adb, s-wchstw.ads,
	s-wchwts.adb, s-wchwts.ads, s-widboo.adb, s-widboo.ads, s-widcha.adb,
	s-widcha.ads, s-widenu.adb, s-widenu.ads, s-widlli.adb, s-widlli.ads,
	s-widllu.adb, s-widllu.ads, s-widwch.adb, s-widwch.ads, s-wwdcha.adb,
	s-wwdcha.ads, s-wwdenu.adb, s-wwdenu.ads, symbols.adb, symbols.ads,
	table.ads, targparm.adb, targparm.ads, tb-alvms.c, tb-alvxw.c,
	tbuild.adb, tree_io.ads, treepr.adb, treeprs.adt, ttypef.ads,
	ttypes.ads, types.adb, uintp.adb, uintp.ads, uname.ads, urealp.ads,
	usage.ads, validsw.ads, vxaddr2line.adb, widechar.adb, widechar.ads,
	xeinfo.adb, xnmake.adb, xref_lib.ads, xr_tabls.adb, xr_tabls.ads,
	xsinfo.adb, xtreeprs.adb, xsnames.adb, vms_conv.ads, vms_conv.adb,
	a-dirval.ads, a-dirval.adb, a-dirval-mingw.adb, a-direct.ads,
	a-direct.adb, indepsw.ads, prj-attr-pm.ads, system-linux-ppc.ads,
	a-numaux-darwin.ads, a-numaux-darwin.adb,
	a-swuwha.ads, a-stunha.ads: Minor reformatting

From-SVN: r107016
2005-11-15 15:06:45 +01:00

1388 lines
46 KiB
Ada

------------------------------------------------------------------------------
-- --
-- GNAT COMPILER COMPONENTS --
-- --
-- G N A T . R E G E X P --
-- --
-- B o d y --
-- --
-- Copyright (C) 1999-2005, AdaCore --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
-- ware Foundation; either version 2, or (at your option) any later ver- --
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
-- for more details. You should have received a copy of the GNU General --
-- Public License distributed with GNAT; see file COPYING. If not, write --
-- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
-- Boston, MA 02110-1301, USA. --
-- --
-- As a special exception, if other files instantiate generics from this --
-- unit, or you link this unit with other files to produce an executable, --
-- this unit does not by itself cause the resulting executable to be --
-- covered by the GNU General Public License. This exception does not --
-- however invalidate any other reasons why the executable file might be --
-- covered by the GNU Public License. --
-- --
-- GNAT was originally developed by the GNAT team at New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc. --
-- --
------------------------------------------------------------------------------
with Unchecked_Deallocation;
with Ada.Exceptions;
with GNAT.Case_Util;
package body GNAT.Regexp is
Open_Paren : constant Character := '(';
Close_Paren : constant Character := ')';
Open_Bracket : constant Character := '[';
Close_Bracket : constant Character := ']';
type State_Index is new Natural;
type Column_Index is new Natural;
type Regexp_Array is array
(State_Index range <>, Column_Index range <>) of State_Index;
-- First index is for the state number
-- Second index is for the character type
-- Contents is the new State
type Regexp_Array_Access is access Regexp_Array;
-- Use this type through the functions Set below, so that it
-- can grow dynamically depending on the needs.
type Mapping is array (Character'Range) of Column_Index;
-- Mapping between characters and column in the Regexp_Array
type Boolean_Array is array (State_Index range <>) of Boolean;
type Regexp_Value
(Alphabet_Size : Column_Index;
Num_States : State_Index) is
record
Map : Mapping;
States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
Is_Final : Boolean_Array (1 .. Num_States);
Case_Sensitive : Boolean;
end record;
-- Deterministic finite-state machine
-----------------------
-- Local Subprograms --
-----------------------
procedure Set
(Table : in out Regexp_Array_Access;
State : State_Index;
Column : Column_Index;
Value : State_Index);
-- Sets a value in the table. If the table is too small, reallocate it
-- dynamically so that (State, Column) is a valid index in it.
function Get
(Table : Regexp_Array_Access;
State : State_Index;
Column : Column_Index)
return State_Index;
-- Returns the value in the table at (State, Column).
-- If this index does not exist in the table, returns 0
procedure Free is new Unchecked_Deallocation
(Regexp_Array, Regexp_Array_Access);
------------
-- Adjust --
------------
procedure Adjust (R : in out Regexp) is
Tmp : Regexp_Access;
begin
Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
Num_States => R.R.Num_States);
Tmp.all := R.R.all;
R.R := Tmp;
end Adjust;
-------------
-- Compile --
-------------
function Compile
(Pattern : String;
Glob : Boolean := False;
Case_Sensitive : Boolean := True)
return Regexp
is
S : String := Pattern;
-- The pattern which is really compiled (when the pattern is case
-- insensitive, we convert this string to lower-cases
Map : Mapping := (others => 0);
-- Mapping between characters and columns in the tables
Alphabet_Size : Column_Index := 0;
-- Number of significant characters in the regular expression.
-- This total does not include special operators, such as *, (, ...
procedure Create_Mapping;
-- Creates a mapping between characters in the regexp and columns
-- in the tables representing the regexp. Test that the regexp is
-- well-formed Modifies Alphabet_Size and Map
procedure Create_Primary_Table
(Table : out Regexp_Array_Access;
Num_States : out State_Index;
Start_State : out State_Index;
End_State : out State_Index);
-- Creates the first version of the regexp (this is a non determinist
-- finite state machine, which is unadapted for a fast pattern
-- matching algorithm). We use a recursive algorithm to process the
-- parenthesis sub-expressions.
--
-- Table : at the end of the procedure : Column 0 is for any character
-- ('.') and the last columns are for no character (closure)
-- Num_States is set to the number of states in the table
-- Start_State is the number of the starting state in the regexp
-- End_State is the number of the final state when the regexp matches
procedure Create_Primary_Table_Glob
(Table : out Regexp_Array_Access;
Num_States : out State_Index;
Start_State : out State_Index;
End_State : out State_Index);
-- Same function as above, but it deals with the second possible
-- grammar for 'globbing pattern', which is a kind of subset of the
-- whole regular expression grammar.
function Create_Secondary_Table
(First_Table : Regexp_Array_Access;
Num_States : State_Index;
Start_State : State_Index;
End_State : State_Index)
return Regexp;
-- Creates the definitive table representing the regular expression
-- This is actually a transformation of the primary table First_Table,
-- where every state is grouped with the states in its 'no-character'
-- columns. The transitions between the new states are then recalculated
-- and if necessary some new states are created.
--
-- Note that the resulting finite-state machine is not optimized in
-- terms of the number of states : it would be more time-consuming to
-- add a third pass to reduce the number of states in the machine, with
-- no speed improvement...
procedure Raise_Exception
(M : String;
Index : Integer);
pragma No_Return (Raise_Exception);
-- Raise an exception, indicating an error at character Index in S
--------------------
-- Create_Mapping --
--------------------
procedure Create_Mapping is
procedure Add_In_Map (C : Character);
-- Add a character in the mapping, if it is not already defined
----------------
-- Add_In_Map --
----------------
procedure Add_In_Map (C : Character) is
begin
if Map (C) = 0 then
Alphabet_Size := Alphabet_Size + 1;
Map (C) := Alphabet_Size;
end if;
end Add_In_Map;
J : Integer := S'First;
Parenthesis_Level : Integer := 0;
Curly_Level : Integer := 0;
-- Start of processing for Create_Mapping
begin
while J <= S'Last loop
case S (J) is
when Open_Bracket =>
J := J + 1;
if S (J) = '^' then
J := J + 1;
end if;
if S (J) = ']' or S (J) = '-' then
J := J + 1;
end if;
-- The first character never has a special meaning
loop
if J > S'Last then
Raise_Exception
("Ran out of characters while parsing ", J);
end if;
exit when S (J) = Close_Bracket;
if S (J) = '-'
and then S (J + 1) /= Close_Bracket
then
declare
Start : constant Integer := J - 1;
begin
J := J + 1;
if S (J) = '\' then
J := J + 1;
end if;
for Char in S (Start) .. S (J) loop
Add_In_Map (Char);
end loop;
end;
else
if S (J) = '\' then
J := J + 1;
end if;
Add_In_Map (S (J));
end if;
J := J + 1;
end loop;
-- A close bracket must follow a open_bracket,
-- and cannot be found alone on the line
when Close_Bracket =>
Raise_Exception
("Incorrect character ']' in regular expression", J);
when '\' =>
if J < S'Last then
J := J + 1;
Add_In_Map (S (J));
else
-- \ not allowed at the end of the regexp
Raise_Exception
("Incorrect character '\' in regular expression", J);
end if;
when Open_Paren =>
if not Glob then
Parenthesis_Level := Parenthesis_Level + 1;
else
Add_In_Map (Open_Paren);
end if;
when Close_Paren =>
if not Glob then
Parenthesis_Level := Parenthesis_Level - 1;
if Parenthesis_Level < 0 then
Raise_Exception
("')' is not associated with '(' in regular "
& "expression", J);
end if;
if S (J - 1) = Open_Paren then
Raise_Exception
("Empty parenthesis not allowed in regular "
& "expression", J);
end if;
else
Add_In_Map (Close_Paren);
end if;
when '.' =>
if Glob then
Add_In_Map ('.');
end if;
when '{' =>
if not Glob then
Add_In_Map (S (J));
else
Curly_Level := Curly_Level + 1;
end if;
when '}' =>
if not Glob then
Add_In_Map (S (J));
else
Curly_Level := Curly_Level - 1;
end if;
when '*' | '?' =>
if not Glob then
if J = S'First then
Raise_Exception
("'*', '+', '?' and '|' operators cannot be in "
& "first position in regular expression", J);
end if;
end if;
when '|' | '+' =>
if not Glob then
if J = S'First then
-- These operators must apply to a sub-expression,
-- and cannot be found at the beginning of the line
Raise_Exception
("'*', '+', '?' and '|' operators cannot be in "
& "first position in regular expression", J);
end if;
else
Add_In_Map (S (J));
end if;
when others =>
Add_In_Map (S (J));
end case;
J := J + 1;
end loop;
-- A closing parenthesis must follow an open parenthesis
if Parenthesis_Level /= 0 then
Raise_Exception
("'(' must always be associated with a ')'", J);
end if;
if Curly_Level /= 0 then
Raise_Exception
("'{' must always be associated with a '}'", J);
end if;
end Create_Mapping;
--------------------------
-- Create_Primary_Table --
--------------------------
procedure Create_Primary_Table
(Table : out Regexp_Array_Access;
Num_States : out State_Index;
Start_State : out State_Index;
End_State : out State_Index)
is
Empty_Char : constant Column_Index := Alphabet_Size + 1;
Current_State : State_Index := 0;
-- Index of the last created state
procedure Add_Empty_Char
(State : State_Index;
To_State : State_Index);
-- Add a empty-character transition from State to To_State
procedure Create_Repetition
(Repetition : Character;
Start_Prev : State_Index;
End_Prev : State_Index;
New_Start : out State_Index;
New_End : in out State_Index);
-- Create the table in case we have a '*', '+' or '?'.
-- Start_Prev .. End_Prev should indicate respectively the start and
-- end index of the previous expression, to which '*', '+' or '?' is
-- applied.
procedure Create_Simple
(Start_Index : Integer;
End_Index : Integer;
Start_State : out State_Index;
End_State : out State_Index);
-- Fill the table for the regexp Simple.
-- This is the recursive procedure called to handle () expressions
-- If End_State = 0, then the call to Create_Simple creates an
-- independent regexp, not a concatenation
-- Start_Index .. End_Index is the starting index in the string S.
--
-- Warning: it may look like we are creating too many empty-string
-- transitions, but they are needed to get the correct regexp.
-- The table is filled as follow ( s means start-state, e means
-- end-state) :
--
-- regexp state_num | a b * empty_string
-- ------- ------------------------------
-- a 1 (s) | 2 - - -
-- 2 (e) | - - - -
--
-- ab 1 (s) | 2 - - -
-- 2 | - - - 3
-- 3 | - 4 - -
-- 4 (e) | - - - -
--
-- a|b 1 | 2 - - -
-- 2 | - - - 6
-- 3 | - 4 - -
-- 4 | - - - 6
-- 5 (s) | - - - 1,3
-- 6 (e) | - - - -
--
-- a* 1 | 2 - - -
-- 2 | - - - 4
-- 3 (s) | - - - 1,4
-- 4 (e) | - - - 3
--
-- (a) 1 (s) | 2 - - -
-- 2 (e) | - - - -
--
-- a+ 1 | 2 - - -
-- 2 | - - - 4
-- 3 (s) | - - - 1
-- 4 (e) | - - - 3
--
-- a? 1 | 2 - - -
-- 2 | - - - 4
-- 3 (s) | - - - 1,4
-- 4 (e) | - - - -
--
-- . 1 (s) | 2 2 2 -
-- 2 (e) | - - - -
function Next_Sub_Expression
(Start_Index : Integer;
End_Index : Integer)
return Integer;
-- Returns the index of the last character of the next sub-expression
-- in Simple. Index cannot be greater than End_Index.
--------------------
-- Add_Empty_Char --
--------------------
procedure Add_Empty_Char
(State : State_Index;
To_State : State_Index)
is
J : Column_Index := Empty_Char;
begin
while Get (Table, State, J) /= 0 loop
J := J + 1;
end loop;
Set (Table, State, J, To_State);
end Add_Empty_Char;
-----------------------
-- Create_Repetition --
-----------------------
procedure Create_Repetition
(Repetition : Character;
Start_Prev : State_Index;
End_Prev : State_Index;
New_Start : out State_Index;
New_End : in out State_Index)
is
begin
New_Start := Current_State + 1;
if New_End /= 0 then
Add_Empty_Char (New_End, New_Start);
end if;
Current_State := Current_State + 2;
New_End := Current_State;
Add_Empty_Char (End_Prev, New_End);
Add_Empty_Char (New_Start, Start_Prev);
if Repetition /= '+' then
Add_Empty_Char (New_Start, New_End);
end if;
if Repetition /= '?' then
Add_Empty_Char (New_End, New_Start);
end if;
end Create_Repetition;
-------------------
-- Create_Simple --
-------------------
procedure Create_Simple
(Start_Index : Integer;
End_Index : Integer;
Start_State : out State_Index;
End_State : out State_Index)
is
J : Integer := Start_Index;
Last_Start : State_Index := 0;
begin
Start_State := 0;
End_State := 0;
while J <= End_Index loop
case S (J) is
when Open_Paren =>
declare
J_Start : constant Integer := J + 1;
Next_Start : State_Index;
Next_End : State_Index;
begin
J := Next_Sub_Expression (J, End_Index);
Create_Simple (J_Start, J - 1, Next_Start, Next_End);
if J < End_Index
and then (S (J + 1) = '*' or else
S (J + 1) = '+' or else
S (J + 1) = '?')
then
J := J + 1;
Create_Repetition
(S (J),
Next_Start,
Next_End,
Last_Start,
End_State);
else
Last_Start := Next_Start;
if End_State /= 0 then
Add_Empty_Char (End_State, Last_Start);
end if;
End_State := Next_End;
end if;
end;
when '|' =>
declare
Start_Prev : constant State_Index := Start_State;
End_Prev : constant State_Index := End_State;
Start_J : constant Integer := J + 1;
Start_Next : State_Index := 0;
End_Next : State_Index := 0;
begin
J := Next_Sub_Expression (J, End_Index);
-- Create a new state for the start of the alternative
Current_State := Current_State + 1;
Last_Start := Current_State;
Start_State := Last_Start;
-- Create the tree for the second part of alternative
Create_Simple (Start_J, J, Start_Next, End_Next);
-- Create the end state
Add_Empty_Char (Last_Start, Start_Next);
Add_Empty_Char (Last_Start, Start_Prev);
Current_State := Current_State + 1;
End_State := Current_State;
Add_Empty_Char (End_Prev, End_State);
Add_Empty_Char (End_Next, End_State);
end;
when Open_Bracket =>
Current_State := Current_State + 1;
declare
Next_State : State_Index := Current_State + 1;
begin
J := J + 1;
if S (J) = '^' then
J := J + 1;
Next_State := 0;
for Column in 0 .. Alphabet_Size loop
Set (Table, Current_State, Column,
Value => Current_State + 1);
end loop;
end if;
-- Automatically add the first character
if S (J) = '-' or S (J) = ']' then
Set (Table, Current_State, Map (S (J)),
Value => Next_State);
J := J + 1;
end if;
-- Loop till closing bracket found
loop
exit when S (J) = Close_Bracket;
if S (J) = '-'
and then S (J + 1) /= ']'
then
declare
Start : constant Integer := J - 1;
begin
J := J + 1;
if S (J) = '\' then
J := J + 1;
end if;
for Char in S (Start) .. S (J) loop
Set (Table, Current_State, Map (Char),
Value => Next_State);
end loop;
end;
else
if S (J) = '\' then
J := J + 1;
end if;
Set (Table, Current_State, Map (S (J)),
Value => Next_State);
end if;
J := J + 1;
end loop;
end;
Current_State := Current_State + 1;
-- If the next symbol is a special symbol
if J < End_Index
and then (S (J + 1) = '*' or else
S (J + 1) = '+' or else
S (J + 1) = '?')
then
J := J + 1;
Create_Repetition
(S (J),
Current_State - 1,
Current_State,
Last_Start,
End_State);
else
Last_Start := Current_State - 1;
if End_State /= 0 then
Add_Empty_Char (End_State, Last_Start);
end if;
End_State := Current_State;
end if;
when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
Raise_Exception
("Incorrect character in regular expression :", J);
when others =>
Current_State := Current_State + 1;
-- Create the state for the symbol S (J)
if S (J) = '.' then
for K in 0 .. Alphabet_Size loop
Set (Table, Current_State, K,
Value => Current_State + 1);
end loop;
else
if S (J) = '\' then
J := J + 1;
end if;
Set (Table, Current_State, Map (S (J)),
Value => Current_State + 1);
end if;
Current_State := Current_State + 1;
-- If the next symbol is a special symbol
if J < End_Index
and then (S (J + 1) = '*' or else
S (J + 1) = '+' or else
S (J + 1) = '?')
then
J := J + 1;
Create_Repetition
(S (J),
Current_State - 1,
Current_State,
Last_Start,
End_State);
else
Last_Start := Current_State - 1;
if End_State /= 0 then
Add_Empty_Char (End_State, Last_Start);
end if;
End_State := Current_State;
end if;
end case;
if Start_State = 0 then
Start_State := Last_Start;
end if;
J := J + 1;
end loop;
end Create_Simple;
-------------------------
-- Next_Sub_Expression --
-------------------------
function Next_Sub_Expression
(Start_Index : Integer;
End_Index : Integer)
return Integer
is
J : Integer := Start_Index;
Start_On_Alter : Boolean := False;
begin
if S (J) = '|' then
Start_On_Alter := True;
end if;
loop
exit when J = End_Index;
J := J + 1;
case S (J) is
when '\' =>
J := J + 1;
when Open_Bracket =>
loop
J := J + 1;
exit when S (J) = Close_Bracket;
if S (J) = '\' then
J := J + 1;
end if;
end loop;
when Open_Paren =>
J := Next_Sub_Expression (J, End_Index);
when Close_Paren =>
return J;
when '|' =>
if Start_On_Alter then
return J - 1;
end if;
when others =>
null;
end case;
end loop;
return J;
end Next_Sub_Expression;
-- Start of Create_Primary_Table
begin
Table.all := (others => (others => 0));
Create_Simple (S'First, S'Last, Start_State, End_State);
Num_States := Current_State;
end Create_Primary_Table;
-------------------------------
-- Create_Primary_Table_Glob --
-------------------------------
procedure Create_Primary_Table_Glob
(Table : out Regexp_Array_Access;
Num_States : out State_Index;
Start_State : out State_Index;
End_State : out State_Index)
is
Empty_Char : constant Column_Index := Alphabet_Size + 1;
Current_State : State_Index := 0;
-- Index of the last created state
procedure Add_Empty_Char
(State : State_Index;
To_State : State_Index);
-- Add a empty-character transition from State to To_State
procedure Create_Simple
(Start_Index : Integer;
End_Index : Integer;
Start_State : out State_Index;
End_State : out State_Index);
-- Fill the table for the S (Start_Index .. End_Index).
-- This is the recursive procedure called to handle () expressions
--------------------
-- Add_Empty_Char --
--------------------
procedure Add_Empty_Char
(State : State_Index;
To_State : State_Index)
is
J : Column_Index := Empty_Char;
begin
while Get (Table, State, J) /= 0 loop
J := J + 1;
end loop;
Set (Table, State, J,
Value => To_State);
end Add_Empty_Char;
-------------------
-- Create_Simple --
-------------------
procedure Create_Simple
(Start_Index : Integer;
End_Index : Integer;
Start_State : out State_Index;
End_State : out State_Index)
is
J : Integer := Start_Index;
Last_Start : State_Index := 0;
begin
Start_State := 0;
End_State := 0;
while J <= End_Index loop
case S (J) is
when Open_Bracket =>
Current_State := Current_State + 1;
declare
Next_State : State_Index := Current_State + 1;
begin
J := J + 1;
if S (J) = '^' then
J := J + 1;
Next_State := 0;
for Column in 0 .. Alphabet_Size loop
Set (Table, Current_State, Column,
Value => Current_State + 1);
end loop;
end if;
-- Automatically add the first character
if S (J) = '-' or S (J) = ']' then
Set (Table, Current_State, Map (S (J)),
Value => Current_State);
J := J + 1;
end if;
-- Loop till closing bracket found
loop
exit when S (J) = Close_Bracket;
if S (J) = '-'
and then S (J + 1) /= ']'
then
declare
Start : constant Integer := J - 1;
begin
J := J + 1;
if S (J) = '\' then
J := J + 1;
end if;
for Char in S (Start) .. S (J) loop
Set (Table, Current_State, Map (Char),
Value => Next_State);
end loop;
end;
else
if S (J) = '\' then
J := J + 1;
end if;
Set (Table, Current_State, Map (S (J)),
Value => Next_State);
end if;
J := J + 1;
end loop;
end;
Last_Start := Current_State;
Current_State := Current_State + 1;
if End_State /= 0 then
Add_Empty_Char (End_State, Last_Start);
end if;
End_State := Current_State;
when '{' =>
declare
End_Sub : Integer;
Start_Regexp_Sub : State_Index;
End_Regexp_Sub : State_Index;
Create_Start : State_Index := 0;
Create_End : State_Index := 0;
-- Initialized to avoid junk warning
begin
while S (J) /= '}' loop
-- First step : find sub pattern
End_Sub := J + 1;
while S (End_Sub) /= ','
and then S (End_Sub) /= '}'
loop
End_Sub := End_Sub + 1;
end loop;
-- Second step : create a sub pattern
Create_Simple
(J + 1,
End_Sub - 1,
Start_Regexp_Sub,
End_Regexp_Sub);
J := End_Sub;
-- Third step : create an alternative
if Create_Start = 0 then
Current_State := Current_State + 1;
Create_Start := Current_State;
Add_Empty_Char (Create_Start, Start_Regexp_Sub);
Current_State := Current_State + 1;
Create_End := Current_State;
Add_Empty_Char (End_Regexp_Sub, Create_End);
else
Current_State := Current_State + 1;
Add_Empty_Char (Current_State, Create_Start);
Create_Start := Current_State;
Add_Empty_Char (Create_Start, Start_Regexp_Sub);
Add_Empty_Char (End_Regexp_Sub, Create_End);
end if;
end loop;
if End_State /= 0 then
Add_Empty_Char (End_State, Create_Start);
end if;
End_State := Create_End;
Last_Start := Create_Start;
end;
when '*' =>
Current_State := Current_State + 1;
if End_State /= 0 then
Add_Empty_Char (End_State, Current_State);
end if;
Add_Empty_Char (Current_State, Current_State + 1);
Add_Empty_Char (Current_State, Current_State + 3);
Last_Start := Current_State;
Current_State := Current_State + 1;
for K in 0 .. Alphabet_Size loop
Set (Table, Current_State, K,
Value => Current_State + 1);
end loop;
Current_State := Current_State + 1;
Add_Empty_Char (Current_State, Current_State + 1);
Current_State := Current_State + 1;
Add_Empty_Char (Current_State, Last_Start);
End_State := Current_State;
when others =>
Current_State := Current_State + 1;
if S (J) = '?' then
for K in 0 .. Alphabet_Size loop
Set (Table, Current_State, K,
Value => Current_State + 1);
end loop;
else
if S (J) = '\' then
J := J + 1;
end if;
-- Create the state for the symbol S (J)
Set (Table, Current_State, Map (S (J)),
Value => Current_State + 1);
end if;
Last_Start := Current_State;
Current_State := Current_State + 1;
if End_State /= 0 then
Add_Empty_Char (End_State, Last_Start);
end if;
End_State := Current_State;
end case;
if Start_State = 0 then
Start_State := Last_Start;
end if;
J := J + 1;
end loop;
end Create_Simple;
-- Start of processing for Create_Primary_Table_Glob
begin
Table.all := (others => (others => 0));
Create_Simple (S'First, S'Last, Start_State, End_State);
Num_States := Current_State;
end Create_Primary_Table_Glob;
----------------------------
-- Create_Secondary_Table --
----------------------------
function Create_Secondary_Table
(First_Table : Regexp_Array_Access;
Num_States : State_Index;
Start_State : State_Index;
End_State : State_Index) return Regexp
is
pragma Warnings (Off, Num_States);
Last_Index : constant State_Index := First_Table'Last (1);
type Meta_State is array (1 .. Last_Index) of Boolean;
Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
(others => (others => 0));
Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
(others => (others => False));
Temp_State_Not_Null : Boolean;
Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
Current_State : State_Index := 1;
Nb_State : State_Index := 1;
procedure Closure
(State : in out Meta_State;
Item : State_Index);
-- Compute the closure of the state (that is every other state which
-- has a empty-character transition) and add it to the state
-------------
-- Closure --
-------------
procedure Closure
(State : in out Meta_State;
Item : State_Index)
is
begin
if State (Item) then
return;
end if;
State (Item) := True;
for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
if First_Table (Item, Column) = 0 then
return;
end if;
Closure (State, First_Table (Item, Column));
end loop;
end Closure;
-- Start of procesing for Create_Secondary_Table
begin
-- Create a new state
Closure (Meta_States (Current_State), Start_State);
while Current_State <= Nb_State loop
-- If this new meta-state includes the primary table end state,
-- then this meta-state will be a final state in the regexp
if Meta_States (Current_State)(End_State) then
Is_Final (Current_State) := True;
end if;
-- For every character in the regexp, calculate the possible
-- transitions from Current_State
for Column in 0 .. Alphabet_Size loop
Meta_States (Nb_State + 1) := (others => False);
Temp_State_Not_Null := False;
for K in Meta_States (Current_State)'Range loop
if Meta_States (Current_State)(K)
and then First_Table (K, Column) /= 0
then
Closure
(Meta_States (Nb_State + 1), First_Table (K, Column));
Temp_State_Not_Null := True;
end if;
end loop;
-- If at least one transition existed
if Temp_State_Not_Null then
-- Check if this new state corresponds to an old one
for K in 1 .. Nb_State loop
if Meta_States (K) = Meta_States (Nb_State + 1) then
Table (Current_State, Column) := K;
exit;
end if;
end loop;
-- If not, create a new state
if Table (Current_State, Column) = 0 then
Nb_State := Nb_State + 1;
Table (Current_State, Column) := Nb_State;
end if;
end if;
end loop;
Current_State := Current_State + 1;
end loop;
-- Returns the regexp
declare
R : Regexp_Access;
begin
R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
Num_States => Nb_State);
R.Map := Map;
R.Is_Final := Is_Final (1 .. Nb_State);
R.Case_Sensitive := Case_Sensitive;
for State in 1 .. Nb_State loop
for K in 0 .. Alphabet_Size loop
R.States (State, K) := Table (State, K);
end loop;
end loop;
return (Ada.Finalization.Controlled with R => R);
end;
end Create_Secondary_Table;
---------------------
-- Raise_Exception --
---------------------
procedure Raise_Exception
(M : String;
Index : Integer)
is
begin
Ada.Exceptions.Raise_Exception
(Error_In_Regexp'Identity, M & " at offset " & Index'Img);
end Raise_Exception;
-- Start of processing for Compile
begin
-- Special case for the empty string: it always matches, and the
-- following processing would fail on it.
if S = "" then
return (Ada.Finalization.Controlled with
R => new Regexp_Value'
(Alphabet_Size => 0,
Num_States => 1,
Map => (others => 0),
States => (others => (others => 1)),
Is_Final => (others => True),
Case_Sensitive => True));
end if;
if not Case_Sensitive then
GNAT.Case_Util.To_Lower (S);
end if;
Create_Mapping;
-- Creates the primary table
declare
Table : Regexp_Array_Access;
Num_States : State_Index;
Start_State : State_Index;
End_State : State_Index;
R : Regexp;
begin
Table := new Regexp_Array (1 .. 100,
0 .. Alphabet_Size + 10);
if not Glob then
Create_Primary_Table (Table, Num_States, Start_State, End_State);
else
Create_Primary_Table_Glob
(Table, Num_States, Start_State, End_State);
end if;
-- Creates the secondary table
R := Create_Secondary_Table
(Table, Num_States, Start_State, End_State);
Free (Table);
return R;
end;
end Compile;
--------------
-- Finalize --
--------------
procedure Finalize (R : in out Regexp) is
procedure Free is new
Unchecked_Deallocation (Regexp_Value, Regexp_Access);
begin
Free (R.R);
end Finalize;
---------
-- Get --
---------
function Get
(Table : Regexp_Array_Access;
State : State_Index;
Column : Column_Index) return State_Index
is
begin
if State <= Table'Last (1)
and then Column <= Table'Last (2)
then
return Table (State, Column);
else
return 0;
end if;
end Get;
-----------
-- Match --
-----------
function Match (S : String; R : Regexp) return Boolean is
Current_State : State_Index := 1;
begin
if R.R = null then
raise Constraint_Error;
end if;
for Char in S'Range loop
if R.R.Case_Sensitive then
Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
else
Current_State :=
R.R.States (Current_State,
R.R.Map (GNAT.Case_Util.To_Lower (S (Char))));
end if;
if Current_State = 0 then
return False;
end if;
end loop;
return R.R.Is_Final (Current_State);
end Match;
---------
-- Set --
---------
procedure Set
(Table : in out Regexp_Array_Access;
State : State_Index;
Column : Column_Index;
Value : State_Index)
is
New_Lines : State_Index;
New_Columns : Column_Index;
New_Table : Regexp_Array_Access;
begin
if State <= Table'Last (1)
and then Column <= Table'Last (2)
then
Table (State, Column) := Value;
else
-- Doubles the size of the table until it is big enough that
-- (State, Column) is a valid index
New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
Table'First (2) .. New_Columns);
New_Table.all := (others => (others => 0));
for J in Table'Range (1) loop
for K in Table'Range (2) loop
New_Table (J, K) := Table (J, K);
end loop;
end loop;
Free (Table);
Table := New_Table;
Table (State, Column) := Value;
end if;
end Set;
end GNAT.Regexp;