mirror of
git://gcc.gnu.org/git/gcc.git
synced 2024-12-20 14:59:40 +08:00
9de61fcb9b
2005-11-14 Robert Dewar <dewar@adacore.com> * a-secain.adb, a-slcain.adb, a-shcain.adb, a-chtgke.ads, a-chtgke.adb, a-stwiha.adb, a-strhas.adb, a-chzla1.ads, a-chzla9.ads, a-stzbou.adb, a-stzbou.ads, a-stzfix.adb, a-stzhas.adb, a-stzmap.adb, a-stzmap.ads, a-stzsea.adb, a-stzsea.ads, a-stzsup.adb, a-stzsup.ads, a-stzunb.adb, a-stzunb.ads, a-szuzha.adb, a-szuzha.ads, a-szuzti.adb, a-szuzti.ads, a-ztcoau.adb, a-ztcoau.ads, a-ztcoio.adb, a-ztcstr.adb, a-ztcstr.ads, a-ztdeau.adb, a-ztdeau.ads, a-ztdeio.adb, a-ztdeio.ads, a-ztedit.adb, a-ztedit.ads, a-ztenau.ads, a-ztenio.adb, a-ztenio.ads, a-ztexio.adb, a-ztexio.ads, a-ztfiio.adb, a-ztfiio.ads, a-ztflau.adb, a-ztflau.ads, a-ztflio.adb, a-ztflio.ads, a-ztgeau.adb, a-ztgeau.ads, a-ztinau.adb, a-ztinau.ads, a-ztinio.adb, a-ztmoau.ads, a-ztmoio.adb, a-ztmoio.ads, a-zttest.adb, g-enblsp-vms-alpha.adb, g-enblsp-vms-alpha.adb, g-enblsp-vms-ia64.adb, g-enblsp-vms-ia64.adb, system-linux-hppa.ads, a-chacon.adb, a-chacon.ads, a-wichun.adb, a-wichun.ads, a-zchuni.adb, a-zchuni.ads, g-trasym-vms-alpha.adb, g-trasym-vms-ia64.adb, system-hpux-ia64.ads, g-soccon-unixware.ads, g-soliop-unixware.ads, g-soccon-interix.ads, g-soliop-solaris.ads, g-eacodu-vms.adb, g-expect-vms.adb, g-socthi-vms.adb, g-soliop-mingw.ads, a-intnam-unixware.ads, a-intnam-lynxos.ads, a-intnam-tru64.ads, a-intnam-aix.ads, a-intnam-linux.ads, a-intnam-linux.ads, a-intnam-dummy.ads, a-numaux-libc-x86.ads, a-intnam-interix.ads, a-intnam-solaris.ads, a-calend-vms.adb, a-calend-vms.ads, a-intnam-vms.ads, a-calend-mingw.adb, a-intnam-mingw.ads, a-intnam-vxworks.ads, a-numaux-vxworks.ads, system-unixware.ads, system-linux-ia64.ads, a-intnam-freebsd.ads, system-freebsd-x86.ads, system-lynxos-ppc.ads, system-linux-x86_64.ads, a-stunha.adb, a-cgaaso.ads, a-cgaaso.adb, a-chtgop.adb, a-cgcaso.adb, a-cgarso.adb, a-cohata.ads, a-crbtgk.adb, a-crbltr.ads, a-coprnu.adb, a-rbtgso.adb, a-intnam-darwin.ads, system-darwin-ppc.ads, gprmake.adb, makegpr.ads, system-tru64.ads, system-aix.ads, system-solaris-x86.ads, system-irix-o32.ads, s-interr-sigaction.adb, system-irix-n32.ads, s-parame-mingw.adb, system-hpux.ads, s-traceb-hpux.adb, system-linux-x86.ads, s-inmaop-dummy.adb, system-os2.ads, system-interix.ads, system-solaris-sparc.ads, system-solaris-sparcv9.ads, s-inmaop-vms.adb, s-mastop-vms.adb, expander.adb, expander.ads, s-gloloc-mingw.adb, system-mingw.ads, system-vms-zcx.ads, s-osinte-fsu.adb, s-traceb-mastop.adb, a-exextr.adb, a-exstat.adb, a-filico.ads, a-finali.ads, a-interr.ads, a-intsig.adb, a-intsig.ads, a-except.ads, a-numaux-x86.ads, a-astaco.adb, a-calend.adb, a-calend.ads, a-chahan.adb, a-chahan.ads, a-chlat9.ads, a-colien.adb, a-colien.ads, a-colire.adb, a-colire.ads, a-comlin.adb, a-comlin.ads, a-cwila1.ads, a-cwila9.ads, a-elchha.adb, a-decima.adb, a-decima.ads, a-diocst.ads, a-direio.adb, a-direio.ads, a-excach.adb, a-excach.adb, a-exctra.ads, ali-util.adb, a-ngcefu.adb, a-ngcoty.adb, a-ngcoty.ads, a-nudira.adb, a-nudira.ads, a-nuflra.adb, a-numaux.ads, a-reatim.ads, a-sequio.adb, a-sequio.ads, a-siocst.ads, a-ssicst.ads, a-stmaco.ads, a-storio.adb, a-strbou.adb, a-strbou.ads, a-stream.ads, a-strfix.adb, a-strmap.adb, a-strmap.ads, a-strsea.adb, a-strsea.ads, a-strsup.adb, a-strsup.ads, a-strunb.adb, a-strunb.ads, a-stwibo.adb, a-stwibo.ads, a-stwifi.adb, a-stwima.adb, a-stwima.ads, a-stwise.adb, a-stwise.ads, a-stwisu.adb, a-stwisu.ads, a-stwiun.adb, a-stwiun.ads, a-suteio.adb, a-suteio.ads, a-swmwco.ads, a-swuwti.adb, a-swuwti.ads, a-sytaco.adb, a-sytaco.ads, a-tasatt.adb, a-taside.adb, a-taside.ads, a-teioed.adb, a-textio.adb, a-textio.ads, a-ticoau.adb, a-ticoau.ads, a-ticoio.adb, a-tideau.adb, a-tideio.adb, a-tienau.adb, a-tienio.adb, a-tifiio.adb, a-tiflio.adb, a-tigeau.adb, a-tigeau.ads, a-tiinau.adb, a-tiinio.adb, a-timoau.adb, a-timoio.adb, a-timoio.ads, a-tiocst.ads, a-titest.adb, atree.adb, a-witeio.adb, a-witeio.ads, a-wtcoau.adb, a-wtcoau.ads, a-wtcoio.adb, a-wtcstr.ads, a-wtdeau.adb, a-wtdeio.adb, a-wtedit.adb, a-wtedit.ads, a-wtenau.adb, a-wtenio.adb, a-wtfiio.adb, a-wtflio.adb, a-wtgeau.adb, a-wtinau.adb, a-wtinio.adb, a-wtmoau.adb, a-wtmoio.adb, a-wttest.adb, back_end.adb, bindgen.adb, butil.adb, butil.ads, checks.ads, cio.c, comperr.adb, csets.ads, cstand.adb, debug.ads, elists.ads, errno.c, errout.adb, errout.ads, erroutc.adb, erroutc.ads, errutil.adb, errutil.ads, errutil.ads, err_vars.ads, eval_fat.adb, exp_ch11.adb, exp_ch11.ads, exp_ch2.adb, exp_ch7.ads, exp_imgv.ads, exp_pakd.adb, exp_prag.adb, exp_prag.ads, exp_tss.adb, exp_tss.ads, exp_vfpt.ads, fe.h, fmap.adb, freeze.ads, frontend.adb, frontend.ads, g-arrspl.adb, g-arrspl.ads, g-awk.adb, g-awk.ads, g-boumai.ads, g-calend.adb, g-calend.ads, g-catiio.adb, g-comlin.adb, g-comlin.ads, g-comlin.ads, g-comlin.ads, g-comver.adb, g-crc32.adb, g-crc32.ads, g-ctrl_c.ads, g-curexc.ads, g-debpoo.ads, g-debuti.adb, g-diopit.adb, g-diopit.ads, g-dirope.adb, g-dirope.ads, g-dyntab.adb, g-dyntab.ads, g-excact.adb, g-excact.ads, g-except.ads, g-exctra.adb, g-exctra.ads, g-expect.ads, g-flocon.ads, g-hesorg.ads, g-io.adb, g-locfil.ads, g-md5.adb, g-md5.ads, g-md5.ads, g-moreex.adb, g-signal.ads, g-signal.adb, gnatbind.ads, gnatchop.adb, gnatcmd.adb, gnatfind.adb, gnatlbr.adb, gnatmake.ads, gnatmem.adb, gnatprep.adb, gnatprep.ads, gnatsym.adb, gnatxref.adb, g-os_lib.adb, g-os_lib.ads, g-pehage.adb, g-pehage.ads, gprep.ads, g-regexp.adb, g-regexp.ads, g-regist.adb, g-regist.ads, g-regpat.ads, g-semaph.adb, g-socthi.adb, g-soliop.ads, g-spipat.adb, g-spipat.ads, g-sptabo.ads, g-sptain.ads, g-sptavs.ads, g-string.ads, g-tasloc.adb, g-tasloc.ads, g-trasym.adb, g-trasym.ads, i-fortra.adb, i-fortra.ads, inline.adb, layout.adb, live.adb, make.ads, makeutl.ads, makeutl.adb, mdll-fil.adb, mdll-fil.ads, mdll-utl.ads, memroot.ads, memtrack.adb, mlib.ads, mlib-fil.adb, mlib-fil.ads, mlib-prj.ads, mlib-utl.adb, mlib-utl.ads, nlists.adb, nlists.ads, osint.adb, osint.ads, osint-c.adb, osint-l.adb, osint-l.ads, osint-m.ads, output.adb, par.adb, par.adb, par.ads, par-ch11.adb, par-ch12.adb, par-ch2.adb, par-ch4.adb, par-ch5.adb, par-ch6.adb, par-ch9.adb, par-endh.adb, par-labl.adb, par-load.adb, par-tchk.adb, prep.adb, prepcomp.adb, prepcomp.ads, prj-attr.ads, prj-com.ads, prj-dect.adb, prj-dect.ads, prj-env.ads, prj-err.ads, prj-ext.ads, prj-makr.adb, prj-makr.ads, prj-nmsc.ads, prj-pars.adb, prj-pars.ads, prj-part.ads, prj-pp.ads, prj-proc.ads, prj-strt.adb, prj-strt.ads, prj-tree.adb, prj-util.adb, prj-util.ads, rtsfind.adb, sem.adb, sem.ads, sem_case.adb, sem_case.ads, sem_ch11.adb, sem_ch4.adb, sem_ch6.ads, sem_ch7.ads, sem_dist.ads, sem_elab.ads, sem_elim.ads, sem_eval.ads, sem_intr.ads, sem_maps.adb, sem_maps.ads, sem_maps.ads, sem_res.ads, sem_type.ads, sfn_scan.adb, sfn_scan.ads, s-imgwch.ads, s-imgwiu.adb, s-imgwiu.ads, s-inmaop.ads, sinput.adb, sinput.ads, s-pack03.adb, s-pack03.ads, s-pack05.adb, s-pack05.ads, s-pack06.adb, s-pack06.ads, s-pack07.adb, s-pack07.ads, s-pack09.adb, s-pack09.ads, s-pack10.adb, s-pack10.ads, s-pack11.adb, s-pack11.ads, s-pack12.adb, s-pack12.ads, s-pack13.adb, s-pack13.ads, s-pack14.adb, s-pack14.ads, s-pack15.adb, s-pack15.ads, s-pack17.adb, s-pack17.ads, s-pack18.adb, s-pack18.ads, s-pack19.adb, s-pack19.ads, s-pack20.adb, s-pack20.ads, s-pack21.adb, s-pack21.ads, s-pack22.adb, s-pack22.ads, s-pack23.adb, s-pack23.ads, s-pack24.adb, s-pack24.ads, s-pack25.adb, s-pack25.ads, s-pack26.adb, s-pack26.ads, s-pack27.adb, s-pack27.ads, s-pack28.adb, s-pack28.ads, s-pack29.adb, s-pack29.ads, s-pack30.adb, s-pack30.ads, s-pack31.adb, s-pack31.ads, s-pack33.adb, s-pack33.ads, s-pack34.adb, s-pack34.ads, s-pack35.adb, s-pack35.ads, s-pack36.adb, s-pack36.ads, s-pack37.adb, s-pack37.ads, s-pack38.adb, s-pack38.ads, s-pack39.adb, s-pack39.ads, s-pack40.adb, s-pack40.ads, s-pack41.adb, s-pack41.ads, s-pack42.adb, s-pack42.ads, s-pack43.adb, s-pack43.ads, s-pack44.adb, s-pack44.ads, s-pack45.adb, s-pack45.ads, s-pack46.adb, s-pack46.ads, s-pack47.adb, s-pack47.ads, s-pack48.adb, s-pack48.ads, s-pack49.adb, s-pack49.ads, s-pack50.adb, s-pack50.ads, s-pack51.adb, s-pack51.ads, s-pack52.adb, s-pack52.ads, s-pack53.adb, s-pack53.ads, s-pack54.adb, s-pack54.ads, s-pack55.adb, s-pack55.ads, s-pack56.adb, s-pack56.ads, s-pack57.adb, s-pack57.ads, s-pack58.adb, s-pack58.ads, s-pack59.adb, s-pack59.ads, s-pack60.adb, s-pack60.adb, s-pack60.ads, s-pack61.adb, s-pack61.ads, s-pack62.adb, s-pack62.ads, s-pack63.adb, s-pack63.ads, s-parint.adb, s-parint.adb, s-parint.ads, sprint.ads, s-purexc.ads, s-restri.ads, s-restri.adb, s-scaval.adb, s-scaval.ads, s-secsta.adb, s-secsta.ads, s-sequio.adb, s-sequio.ads, stand.ads, s-tasuti.adb, s-traceb.adb, s-traceb.ads, stringt.adb, stringt.ads, styleg.ads, s-valboo.adb, s-valboo.ads, s-valcha.adb, s-valcha.ads, s-valdec.adb, s-valdec.ads, s-valint.adb, s-valint.ads, s-valint.ads, s-vallld.adb, s-vallld.ads, s-vallli.adb, s-vallli.ads, s-valllu.adb, s-valllu.ads, s-valrea.adb, s-valrea.ads, s-valuns.adb, s-valuns.ads, s-valuti.adb, s-valuti.ads, s-valwch.ads, s-veboop.adb, s-veboop.ads, s-vercon.adb, s-vercon.ads, s-wchcnv.adb, s-wchcnv.ads, s-wchcon.ads, s-wchjis.adb, s-wchjis.ads, s-wchstw.adb, s-wchstw.adb, s-wchstw.ads, s-wchwts.adb, s-wchwts.ads, s-widboo.adb, s-widboo.ads, s-widcha.adb, s-widcha.ads, s-widenu.adb, s-widenu.ads, s-widlli.adb, s-widlli.ads, s-widllu.adb, s-widllu.ads, s-widwch.adb, s-widwch.ads, s-wwdcha.adb, s-wwdcha.ads, s-wwdenu.adb, s-wwdenu.ads, symbols.adb, symbols.ads, table.ads, targparm.adb, targparm.ads, tb-alvms.c, tb-alvxw.c, tbuild.adb, tree_io.ads, treepr.adb, treeprs.adt, ttypef.ads, ttypes.ads, types.adb, uintp.adb, uintp.ads, uname.ads, urealp.ads, usage.ads, validsw.ads, vxaddr2line.adb, widechar.adb, widechar.ads, xeinfo.adb, xnmake.adb, xref_lib.ads, xr_tabls.adb, xr_tabls.ads, xsinfo.adb, xtreeprs.adb, xsnames.adb, vms_conv.ads, vms_conv.adb, a-dirval.ads, a-dirval.adb, a-dirval-mingw.adb, a-direct.ads, a-direct.adb, indepsw.ads, prj-attr-pm.ads, system-linux-ppc.ads, a-numaux-darwin.ads, a-numaux-darwin.adb, a-swuwha.ads, a-stunha.ads: Minor reformatting From-SVN: r107016
1388 lines
46 KiB
Ada
1388 lines
46 KiB
Ada
------------------------------------------------------------------------------
|
|
-- --
|
|
-- GNAT COMPILER COMPONENTS --
|
|
-- --
|
|
-- G N A T . R E G E X P --
|
|
-- --
|
|
-- B o d y --
|
|
-- --
|
|
-- Copyright (C) 1999-2005, AdaCore --
|
|
-- --
|
|
-- GNAT is free software; you can redistribute it and/or modify it under --
|
|
-- terms of the GNU General Public License as published by the Free Soft- --
|
|
-- ware Foundation; either version 2, or (at your option) any later ver- --
|
|
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
|
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
|
-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
|
|
-- for more details. You should have received a copy of the GNU General --
|
|
-- Public License distributed with GNAT; see file COPYING. If not, write --
|
|
-- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
|
|
-- Boston, MA 02110-1301, USA. --
|
|
-- --
|
|
-- As a special exception, if other files instantiate generics from this --
|
|
-- unit, or you link this unit with other files to produce an executable, --
|
|
-- this unit does not by itself cause the resulting executable to be --
|
|
-- covered by the GNU General Public License. This exception does not --
|
|
-- however invalidate any other reasons why the executable file might be --
|
|
-- covered by the GNU Public License. --
|
|
-- --
|
|
-- GNAT was originally developed by the GNAT team at New York University. --
|
|
-- Extensive contributions were provided by Ada Core Technologies Inc. --
|
|
-- --
|
|
------------------------------------------------------------------------------
|
|
|
|
with Unchecked_Deallocation;
|
|
with Ada.Exceptions;
|
|
with GNAT.Case_Util;
|
|
|
|
package body GNAT.Regexp is
|
|
|
|
Open_Paren : constant Character := '(';
|
|
Close_Paren : constant Character := ')';
|
|
Open_Bracket : constant Character := '[';
|
|
Close_Bracket : constant Character := ']';
|
|
|
|
type State_Index is new Natural;
|
|
type Column_Index is new Natural;
|
|
|
|
type Regexp_Array is array
|
|
(State_Index range <>, Column_Index range <>) of State_Index;
|
|
-- First index is for the state number
|
|
-- Second index is for the character type
|
|
-- Contents is the new State
|
|
|
|
type Regexp_Array_Access is access Regexp_Array;
|
|
-- Use this type through the functions Set below, so that it
|
|
-- can grow dynamically depending on the needs.
|
|
|
|
type Mapping is array (Character'Range) of Column_Index;
|
|
-- Mapping between characters and column in the Regexp_Array
|
|
|
|
type Boolean_Array is array (State_Index range <>) of Boolean;
|
|
|
|
type Regexp_Value
|
|
(Alphabet_Size : Column_Index;
|
|
Num_States : State_Index) is
|
|
record
|
|
Map : Mapping;
|
|
States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
|
|
Is_Final : Boolean_Array (1 .. Num_States);
|
|
Case_Sensitive : Boolean;
|
|
end record;
|
|
-- Deterministic finite-state machine
|
|
|
|
-----------------------
|
|
-- Local Subprograms --
|
|
-----------------------
|
|
|
|
procedure Set
|
|
(Table : in out Regexp_Array_Access;
|
|
State : State_Index;
|
|
Column : Column_Index;
|
|
Value : State_Index);
|
|
-- Sets a value in the table. If the table is too small, reallocate it
|
|
-- dynamically so that (State, Column) is a valid index in it.
|
|
|
|
function Get
|
|
(Table : Regexp_Array_Access;
|
|
State : State_Index;
|
|
Column : Column_Index)
|
|
return State_Index;
|
|
-- Returns the value in the table at (State, Column).
|
|
-- If this index does not exist in the table, returns 0
|
|
|
|
procedure Free is new Unchecked_Deallocation
|
|
(Regexp_Array, Regexp_Array_Access);
|
|
|
|
------------
|
|
-- Adjust --
|
|
------------
|
|
|
|
procedure Adjust (R : in out Regexp) is
|
|
Tmp : Regexp_Access;
|
|
|
|
begin
|
|
Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
|
|
Num_States => R.R.Num_States);
|
|
Tmp.all := R.R.all;
|
|
R.R := Tmp;
|
|
end Adjust;
|
|
|
|
-------------
|
|
-- Compile --
|
|
-------------
|
|
|
|
function Compile
|
|
(Pattern : String;
|
|
Glob : Boolean := False;
|
|
Case_Sensitive : Boolean := True)
|
|
return Regexp
|
|
is
|
|
S : String := Pattern;
|
|
-- The pattern which is really compiled (when the pattern is case
|
|
-- insensitive, we convert this string to lower-cases
|
|
|
|
Map : Mapping := (others => 0);
|
|
-- Mapping between characters and columns in the tables
|
|
|
|
Alphabet_Size : Column_Index := 0;
|
|
-- Number of significant characters in the regular expression.
|
|
-- This total does not include special operators, such as *, (, ...
|
|
|
|
procedure Create_Mapping;
|
|
-- Creates a mapping between characters in the regexp and columns
|
|
-- in the tables representing the regexp. Test that the regexp is
|
|
-- well-formed Modifies Alphabet_Size and Map
|
|
|
|
procedure Create_Primary_Table
|
|
(Table : out Regexp_Array_Access;
|
|
Num_States : out State_Index;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index);
|
|
-- Creates the first version of the regexp (this is a non determinist
|
|
-- finite state machine, which is unadapted for a fast pattern
|
|
-- matching algorithm). We use a recursive algorithm to process the
|
|
-- parenthesis sub-expressions.
|
|
--
|
|
-- Table : at the end of the procedure : Column 0 is for any character
|
|
-- ('.') and the last columns are for no character (closure)
|
|
-- Num_States is set to the number of states in the table
|
|
-- Start_State is the number of the starting state in the regexp
|
|
-- End_State is the number of the final state when the regexp matches
|
|
|
|
procedure Create_Primary_Table_Glob
|
|
(Table : out Regexp_Array_Access;
|
|
Num_States : out State_Index;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index);
|
|
-- Same function as above, but it deals with the second possible
|
|
-- grammar for 'globbing pattern', which is a kind of subset of the
|
|
-- whole regular expression grammar.
|
|
|
|
function Create_Secondary_Table
|
|
(First_Table : Regexp_Array_Access;
|
|
Num_States : State_Index;
|
|
Start_State : State_Index;
|
|
End_State : State_Index)
|
|
return Regexp;
|
|
-- Creates the definitive table representing the regular expression
|
|
-- This is actually a transformation of the primary table First_Table,
|
|
-- where every state is grouped with the states in its 'no-character'
|
|
-- columns. The transitions between the new states are then recalculated
|
|
-- and if necessary some new states are created.
|
|
--
|
|
-- Note that the resulting finite-state machine is not optimized in
|
|
-- terms of the number of states : it would be more time-consuming to
|
|
-- add a third pass to reduce the number of states in the machine, with
|
|
-- no speed improvement...
|
|
|
|
procedure Raise_Exception
|
|
(M : String;
|
|
Index : Integer);
|
|
pragma No_Return (Raise_Exception);
|
|
-- Raise an exception, indicating an error at character Index in S
|
|
|
|
--------------------
|
|
-- Create_Mapping --
|
|
--------------------
|
|
|
|
procedure Create_Mapping is
|
|
|
|
procedure Add_In_Map (C : Character);
|
|
-- Add a character in the mapping, if it is not already defined
|
|
|
|
----------------
|
|
-- Add_In_Map --
|
|
----------------
|
|
|
|
procedure Add_In_Map (C : Character) is
|
|
begin
|
|
if Map (C) = 0 then
|
|
Alphabet_Size := Alphabet_Size + 1;
|
|
Map (C) := Alphabet_Size;
|
|
end if;
|
|
end Add_In_Map;
|
|
|
|
J : Integer := S'First;
|
|
Parenthesis_Level : Integer := 0;
|
|
Curly_Level : Integer := 0;
|
|
|
|
-- Start of processing for Create_Mapping
|
|
|
|
begin
|
|
while J <= S'Last loop
|
|
case S (J) is
|
|
when Open_Bracket =>
|
|
J := J + 1;
|
|
|
|
if S (J) = '^' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
if S (J) = ']' or S (J) = '-' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
-- The first character never has a special meaning
|
|
|
|
loop
|
|
if J > S'Last then
|
|
Raise_Exception
|
|
("Ran out of characters while parsing ", J);
|
|
end if;
|
|
|
|
exit when S (J) = Close_Bracket;
|
|
|
|
if S (J) = '-'
|
|
and then S (J + 1) /= Close_Bracket
|
|
then
|
|
declare
|
|
Start : constant Integer := J - 1;
|
|
|
|
begin
|
|
J := J + 1;
|
|
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
for Char in S (Start) .. S (J) loop
|
|
Add_In_Map (Char);
|
|
end loop;
|
|
end;
|
|
else
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
Add_In_Map (S (J));
|
|
end if;
|
|
|
|
J := J + 1;
|
|
end loop;
|
|
|
|
-- A close bracket must follow a open_bracket,
|
|
-- and cannot be found alone on the line
|
|
|
|
when Close_Bracket =>
|
|
Raise_Exception
|
|
("Incorrect character ']' in regular expression", J);
|
|
|
|
when '\' =>
|
|
if J < S'Last then
|
|
J := J + 1;
|
|
Add_In_Map (S (J));
|
|
|
|
else
|
|
-- \ not allowed at the end of the regexp
|
|
|
|
Raise_Exception
|
|
("Incorrect character '\' in regular expression", J);
|
|
end if;
|
|
|
|
when Open_Paren =>
|
|
if not Glob then
|
|
Parenthesis_Level := Parenthesis_Level + 1;
|
|
else
|
|
Add_In_Map (Open_Paren);
|
|
end if;
|
|
|
|
when Close_Paren =>
|
|
if not Glob then
|
|
Parenthesis_Level := Parenthesis_Level - 1;
|
|
|
|
if Parenthesis_Level < 0 then
|
|
Raise_Exception
|
|
("')' is not associated with '(' in regular "
|
|
& "expression", J);
|
|
end if;
|
|
|
|
if S (J - 1) = Open_Paren then
|
|
Raise_Exception
|
|
("Empty parenthesis not allowed in regular "
|
|
& "expression", J);
|
|
end if;
|
|
|
|
else
|
|
Add_In_Map (Close_Paren);
|
|
end if;
|
|
|
|
when '.' =>
|
|
if Glob then
|
|
Add_In_Map ('.');
|
|
end if;
|
|
|
|
when '{' =>
|
|
if not Glob then
|
|
Add_In_Map (S (J));
|
|
else
|
|
Curly_Level := Curly_Level + 1;
|
|
end if;
|
|
|
|
when '}' =>
|
|
if not Glob then
|
|
Add_In_Map (S (J));
|
|
else
|
|
Curly_Level := Curly_Level - 1;
|
|
end if;
|
|
|
|
when '*' | '?' =>
|
|
if not Glob then
|
|
if J = S'First then
|
|
Raise_Exception
|
|
("'*', '+', '?' and '|' operators cannot be in "
|
|
& "first position in regular expression", J);
|
|
end if;
|
|
end if;
|
|
|
|
when '|' | '+' =>
|
|
if not Glob then
|
|
if J = S'First then
|
|
|
|
-- These operators must apply to a sub-expression,
|
|
-- and cannot be found at the beginning of the line
|
|
|
|
Raise_Exception
|
|
("'*', '+', '?' and '|' operators cannot be in "
|
|
& "first position in regular expression", J);
|
|
end if;
|
|
|
|
else
|
|
Add_In_Map (S (J));
|
|
end if;
|
|
|
|
when others =>
|
|
Add_In_Map (S (J));
|
|
end case;
|
|
|
|
J := J + 1;
|
|
end loop;
|
|
|
|
-- A closing parenthesis must follow an open parenthesis
|
|
|
|
if Parenthesis_Level /= 0 then
|
|
Raise_Exception
|
|
("'(' must always be associated with a ')'", J);
|
|
end if;
|
|
|
|
if Curly_Level /= 0 then
|
|
Raise_Exception
|
|
("'{' must always be associated with a '}'", J);
|
|
end if;
|
|
end Create_Mapping;
|
|
|
|
--------------------------
|
|
-- Create_Primary_Table --
|
|
--------------------------
|
|
|
|
procedure Create_Primary_Table
|
|
(Table : out Regexp_Array_Access;
|
|
Num_States : out State_Index;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index)
|
|
is
|
|
Empty_Char : constant Column_Index := Alphabet_Size + 1;
|
|
|
|
Current_State : State_Index := 0;
|
|
-- Index of the last created state
|
|
|
|
procedure Add_Empty_Char
|
|
(State : State_Index;
|
|
To_State : State_Index);
|
|
-- Add a empty-character transition from State to To_State
|
|
|
|
procedure Create_Repetition
|
|
(Repetition : Character;
|
|
Start_Prev : State_Index;
|
|
End_Prev : State_Index;
|
|
New_Start : out State_Index;
|
|
New_End : in out State_Index);
|
|
-- Create the table in case we have a '*', '+' or '?'.
|
|
-- Start_Prev .. End_Prev should indicate respectively the start and
|
|
-- end index of the previous expression, to which '*', '+' or '?' is
|
|
-- applied.
|
|
|
|
procedure Create_Simple
|
|
(Start_Index : Integer;
|
|
End_Index : Integer;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index);
|
|
-- Fill the table for the regexp Simple.
|
|
-- This is the recursive procedure called to handle () expressions
|
|
-- If End_State = 0, then the call to Create_Simple creates an
|
|
-- independent regexp, not a concatenation
|
|
-- Start_Index .. End_Index is the starting index in the string S.
|
|
--
|
|
-- Warning: it may look like we are creating too many empty-string
|
|
-- transitions, but they are needed to get the correct regexp.
|
|
-- The table is filled as follow ( s means start-state, e means
|
|
-- end-state) :
|
|
--
|
|
-- regexp state_num | a b * empty_string
|
|
-- ------- ------------------------------
|
|
-- a 1 (s) | 2 - - -
|
|
-- 2 (e) | - - - -
|
|
--
|
|
-- ab 1 (s) | 2 - - -
|
|
-- 2 | - - - 3
|
|
-- 3 | - 4 - -
|
|
-- 4 (e) | - - - -
|
|
--
|
|
-- a|b 1 | 2 - - -
|
|
-- 2 | - - - 6
|
|
-- 3 | - 4 - -
|
|
-- 4 | - - - 6
|
|
-- 5 (s) | - - - 1,3
|
|
-- 6 (e) | - - - -
|
|
--
|
|
-- a* 1 | 2 - - -
|
|
-- 2 | - - - 4
|
|
-- 3 (s) | - - - 1,4
|
|
-- 4 (e) | - - - 3
|
|
--
|
|
-- (a) 1 (s) | 2 - - -
|
|
-- 2 (e) | - - - -
|
|
--
|
|
-- a+ 1 | 2 - - -
|
|
-- 2 | - - - 4
|
|
-- 3 (s) | - - - 1
|
|
-- 4 (e) | - - - 3
|
|
--
|
|
-- a? 1 | 2 - - -
|
|
-- 2 | - - - 4
|
|
-- 3 (s) | - - - 1,4
|
|
-- 4 (e) | - - - -
|
|
--
|
|
-- . 1 (s) | 2 2 2 -
|
|
-- 2 (e) | - - - -
|
|
|
|
function Next_Sub_Expression
|
|
(Start_Index : Integer;
|
|
End_Index : Integer)
|
|
return Integer;
|
|
-- Returns the index of the last character of the next sub-expression
|
|
-- in Simple. Index cannot be greater than End_Index.
|
|
|
|
--------------------
|
|
-- Add_Empty_Char --
|
|
--------------------
|
|
|
|
procedure Add_Empty_Char
|
|
(State : State_Index;
|
|
To_State : State_Index)
|
|
is
|
|
J : Column_Index := Empty_Char;
|
|
|
|
begin
|
|
while Get (Table, State, J) /= 0 loop
|
|
J := J + 1;
|
|
end loop;
|
|
|
|
Set (Table, State, J, To_State);
|
|
end Add_Empty_Char;
|
|
|
|
-----------------------
|
|
-- Create_Repetition --
|
|
-----------------------
|
|
|
|
procedure Create_Repetition
|
|
(Repetition : Character;
|
|
Start_Prev : State_Index;
|
|
End_Prev : State_Index;
|
|
New_Start : out State_Index;
|
|
New_End : in out State_Index)
|
|
is
|
|
begin
|
|
New_Start := Current_State + 1;
|
|
|
|
if New_End /= 0 then
|
|
Add_Empty_Char (New_End, New_Start);
|
|
end if;
|
|
|
|
Current_State := Current_State + 2;
|
|
New_End := Current_State;
|
|
|
|
Add_Empty_Char (End_Prev, New_End);
|
|
Add_Empty_Char (New_Start, Start_Prev);
|
|
|
|
if Repetition /= '+' then
|
|
Add_Empty_Char (New_Start, New_End);
|
|
end if;
|
|
|
|
if Repetition /= '?' then
|
|
Add_Empty_Char (New_End, New_Start);
|
|
end if;
|
|
end Create_Repetition;
|
|
|
|
-------------------
|
|
-- Create_Simple --
|
|
-------------------
|
|
|
|
procedure Create_Simple
|
|
(Start_Index : Integer;
|
|
End_Index : Integer;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index)
|
|
is
|
|
J : Integer := Start_Index;
|
|
Last_Start : State_Index := 0;
|
|
|
|
begin
|
|
Start_State := 0;
|
|
End_State := 0;
|
|
while J <= End_Index loop
|
|
case S (J) is
|
|
when Open_Paren =>
|
|
declare
|
|
J_Start : constant Integer := J + 1;
|
|
Next_Start : State_Index;
|
|
Next_End : State_Index;
|
|
|
|
begin
|
|
J := Next_Sub_Expression (J, End_Index);
|
|
Create_Simple (J_Start, J - 1, Next_Start, Next_End);
|
|
|
|
if J < End_Index
|
|
and then (S (J + 1) = '*' or else
|
|
S (J + 1) = '+' or else
|
|
S (J + 1) = '?')
|
|
then
|
|
J := J + 1;
|
|
Create_Repetition
|
|
(S (J),
|
|
Next_Start,
|
|
Next_End,
|
|
Last_Start,
|
|
End_State);
|
|
|
|
else
|
|
Last_Start := Next_Start;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Last_Start);
|
|
end if;
|
|
|
|
End_State := Next_End;
|
|
end if;
|
|
end;
|
|
|
|
when '|' =>
|
|
declare
|
|
Start_Prev : constant State_Index := Start_State;
|
|
End_Prev : constant State_Index := End_State;
|
|
Start_J : constant Integer := J + 1;
|
|
Start_Next : State_Index := 0;
|
|
End_Next : State_Index := 0;
|
|
|
|
begin
|
|
J := Next_Sub_Expression (J, End_Index);
|
|
|
|
-- Create a new state for the start of the alternative
|
|
|
|
Current_State := Current_State + 1;
|
|
Last_Start := Current_State;
|
|
Start_State := Last_Start;
|
|
|
|
-- Create the tree for the second part of alternative
|
|
|
|
Create_Simple (Start_J, J, Start_Next, End_Next);
|
|
|
|
-- Create the end state
|
|
|
|
Add_Empty_Char (Last_Start, Start_Next);
|
|
Add_Empty_Char (Last_Start, Start_Prev);
|
|
Current_State := Current_State + 1;
|
|
End_State := Current_State;
|
|
Add_Empty_Char (End_Prev, End_State);
|
|
Add_Empty_Char (End_Next, End_State);
|
|
end;
|
|
|
|
when Open_Bracket =>
|
|
Current_State := Current_State + 1;
|
|
|
|
declare
|
|
Next_State : State_Index := Current_State + 1;
|
|
|
|
begin
|
|
J := J + 1;
|
|
|
|
if S (J) = '^' then
|
|
J := J + 1;
|
|
|
|
Next_State := 0;
|
|
|
|
for Column in 0 .. Alphabet_Size loop
|
|
Set (Table, Current_State, Column,
|
|
Value => Current_State + 1);
|
|
end loop;
|
|
end if;
|
|
|
|
-- Automatically add the first character
|
|
|
|
if S (J) = '-' or S (J) = ']' then
|
|
Set (Table, Current_State, Map (S (J)),
|
|
Value => Next_State);
|
|
J := J + 1;
|
|
end if;
|
|
|
|
-- Loop till closing bracket found
|
|
|
|
loop
|
|
exit when S (J) = Close_Bracket;
|
|
|
|
if S (J) = '-'
|
|
and then S (J + 1) /= ']'
|
|
then
|
|
declare
|
|
Start : constant Integer := J - 1;
|
|
|
|
begin
|
|
J := J + 1;
|
|
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
for Char in S (Start) .. S (J) loop
|
|
Set (Table, Current_State, Map (Char),
|
|
Value => Next_State);
|
|
end loop;
|
|
end;
|
|
|
|
else
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
Set (Table, Current_State, Map (S (J)),
|
|
Value => Next_State);
|
|
end if;
|
|
J := J + 1;
|
|
end loop;
|
|
end;
|
|
|
|
Current_State := Current_State + 1;
|
|
|
|
-- If the next symbol is a special symbol
|
|
|
|
if J < End_Index
|
|
and then (S (J + 1) = '*' or else
|
|
S (J + 1) = '+' or else
|
|
S (J + 1) = '?')
|
|
then
|
|
J := J + 1;
|
|
Create_Repetition
|
|
(S (J),
|
|
Current_State - 1,
|
|
Current_State,
|
|
Last_Start,
|
|
End_State);
|
|
|
|
else
|
|
Last_Start := Current_State - 1;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Last_Start);
|
|
end if;
|
|
|
|
End_State := Current_State;
|
|
end if;
|
|
|
|
when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
|
|
Raise_Exception
|
|
("Incorrect character in regular expression :", J);
|
|
|
|
when others =>
|
|
Current_State := Current_State + 1;
|
|
|
|
-- Create the state for the symbol S (J)
|
|
|
|
if S (J) = '.' then
|
|
for K in 0 .. Alphabet_Size loop
|
|
Set (Table, Current_State, K,
|
|
Value => Current_State + 1);
|
|
end loop;
|
|
|
|
else
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
Set (Table, Current_State, Map (S (J)),
|
|
Value => Current_State + 1);
|
|
end if;
|
|
|
|
Current_State := Current_State + 1;
|
|
|
|
-- If the next symbol is a special symbol
|
|
|
|
if J < End_Index
|
|
and then (S (J + 1) = '*' or else
|
|
S (J + 1) = '+' or else
|
|
S (J + 1) = '?')
|
|
then
|
|
J := J + 1;
|
|
Create_Repetition
|
|
(S (J),
|
|
Current_State - 1,
|
|
Current_State,
|
|
Last_Start,
|
|
End_State);
|
|
|
|
else
|
|
Last_Start := Current_State - 1;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Last_Start);
|
|
end if;
|
|
|
|
End_State := Current_State;
|
|
end if;
|
|
|
|
end case;
|
|
|
|
if Start_State = 0 then
|
|
Start_State := Last_Start;
|
|
end if;
|
|
|
|
J := J + 1;
|
|
end loop;
|
|
end Create_Simple;
|
|
|
|
-------------------------
|
|
-- Next_Sub_Expression --
|
|
-------------------------
|
|
|
|
function Next_Sub_Expression
|
|
(Start_Index : Integer;
|
|
End_Index : Integer)
|
|
return Integer
|
|
is
|
|
J : Integer := Start_Index;
|
|
Start_On_Alter : Boolean := False;
|
|
|
|
begin
|
|
if S (J) = '|' then
|
|
Start_On_Alter := True;
|
|
end if;
|
|
|
|
loop
|
|
exit when J = End_Index;
|
|
J := J + 1;
|
|
|
|
case S (J) is
|
|
when '\' =>
|
|
J := J + 1;
|
|
|
|
when Open_Bracket =>
|
|
loop
|
|
J := J + 1;
|
|
exit when S (J) = Close_Bracket;
|
|
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
end loop;
|
|
|
|
when Open_Paren =>
|
|
J := Next_Sub_Expression (J, End_Index);
|
|
|
|
when Close_Paren =>
|
|
return J;
|
|
|
|
when '|' =>
|
|
if Start_On_Alter then
|
|
return J - 1;
|
|
end if;
|
|
|
|
when others =>
|
|
null;
|
|
end case;
|
|
end loop;
|
|
|
|
return J;
|
|
end Next_Sub_Expression;
|
|
|
|
-- Start of Create_Primary_Table
|
|
|
|
begin
|
|
Table.all := (others => (others => 0));
|
|
Create_Simple (S'First, S'Last, Start_State, End_State);
|
|
Num_States := Current_State;
|
|
end Create_Primary_Table;
|
|
|
|
-------------------------------
|
|
-- Create_Primary_Table_Glob --
|
|
-------------------------------
|
|
|
|
procedure Create_Primary_Table_Glob
|
|
(Table : out Regexp_Array_Access;
|
|
Num_States : out State_Index;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index)
|
|
is
|
|
Empty_Char : constant Column_Index := Alphabet_Size + 1;
|
|
|
|
Current_State : State_Index := 0;
|
|
-- Index of the last created state
|
|
|
|
procedure Add_Empty_Char
|
|
(State : State_Index;
|
|
To_State : State_Index);
|
|
-- Add a empty-character transition from State to To_State
|
|
|
|
procedure Create_Simple
|
|
(Start_Index : Integer;
|
|
End_Index : Integer;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index);
|
|
-- Fill the table for the S (Start_Index .. End_Index).
|
|
-- This is the recursive procedure called to handle () expressions
|
|
|
|
--------------------
|
|
-- Add_Empty_Char --
|
|
--------------------
|
|
|
|
procedure Add_Empty_Char
|
|
(State : State_Index;
|
|
To_State : State_Index)
|
|
is
|
|
J : Column_Index := Empty_Char;
|
|
|
|
begin
|
|
while Get (Table, State, J) /= 0 loop
|
|
J := J + 1;
|
|
end loop;
|
|
|
|
Set (Table, State, J,
|
|
Value => To_State);
|
|
end Add_Empty_Char;
|
|
|
|
-------------------
|
|
-- Create_Simple --
|
|
-------------------
|
|
|
|
procedure Create_Simple
|
|
(Start_Index : Integer;
|
|
End_Index : Integer;
|
|
Start_State : out State_Index;
|
|
End_State : out State_Index)
|
|
is
|
|
J : Integer := Start_Index;
|
|
Last_Start : State_Index := 0;
|
|
|
|
begin
|
|
Start_State := 0;
|
|
End_State := 0;
|
|
|
|
while J <= End_Index loop
|
|
case S (J) is
|
|
|
|
when Open_Bracket =>
|
|
Current_State := Current_State + 1;
|
|
|
|
declare
|
|
Next_State : State_Index := Current_State + 1;
|
|
|
|
begin
|
|
J := J + 1;
|
|
|
|
if S (J) = '^' then
|
|
J := J + 1;
|
|
Next_State := 0;
|
|
|
|
for Column in 0 .. Alphabet_Size loop
|
|
Set (Table, Current_State, Column,
|
|
Value => Current_State + 1);
|
|
end loop;
|
|
end if;
|
|
|
|
-- Automatically add the first character
|
|
|
|
if S (J) = '-' or S (J) = ']' then
|
|
Set (Table, Current_State, Map (S (J)),
|
|
Value => Current_State);
|
|
J := J + 1;
|
|
end if;
|
|
|
|
-- Loop till closing bracket found
|
|
|
|
loop
|
|
exit when S (J) = Close_Bracket;
|
|
|
|
if S (J) = '-'
|
|
and then S (J + 1) /= ']'
|
|
then
|
|
declare
|
|
Start : constant Integer := J - 1;
|
|
begin
|
|
J := J + 1;
|
|
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
for Char in S (Start) .. S (J) loop
|
|
Set (Table, Current_State, Map (Char),
|
|
Value => Next_State);
|
|
end loop;
|
|
end;
|
|
|
|
else
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
Set (Table, Current_State, Map (S (J)),
|
|
Value => Next_State);
|
|
end if;
|
|
J := J + 1;
|
|
end loop;
|
|
end;
|
|
|
|
Last_Start := Current_State;
|
|
Current_State := Current_State + 1;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Last_Start);
|
|
end if;
|
|
|
|
End_State := Current_State;
|
|
|
|
when '{' =>
|
|
declare
|
|
End_Sub : Integer;
|
|
Start_Regexp_Sub : State_Index;
|
|
End_Regexp_Sub : State_Index;
|
|
Create_Start : State_Index := 0;
|
|
|
|
Create_End : State_Index := 0;
|
|
-- Initialized to avoid junk warning
|
|
|
|
begin
|
|
while S (J) /= '}' loop
|
|
|
|
-- First step : find sub pattern
|
|
|
|
End_Sub := J + 1;
|
|
while S (End_Sub) /= ','
|
|
and then S (End_Sub) /= '}'
|
|
loop
|
|
End_Sub := End_Sub + 1;
|
|
end loop;
|
|
|
|
-- Second step : create a sub pattern
|
|
|
|
Create_Simple
|
|
(J + 1,
|
|
End_Sub - 1,
|
|
Start_Regexp_Sub,
|
|
End_Regexp_Sub);
|
|
|
|
J := End_Sub;
|
|
|
|
-- Third step : create an alternative
|
|
|
|
if Create_Start = 0 then
|
|
Current_State := Current_State + 1;
|
|
Create_Start := Current_State;
|
|
Add_Empty_Char (Create_Start, Start_Regexp_Sub);
|
|
Current_State := Current_State + 1;
|
|
Create_End := Current_State;
|
|
Add_Empty_Char (End_Regexp_Sub, Create_End);
|
|
|
|
else
|
|
Current_State := Current_State + 1;
|
|
Add_Empty_Char (Current_State, Create_Start);
|
|
Create_Start := Current_State;
|
|
Add_Empty_Char (Create_Start, Start_Regexp_Sub);
|
|
Add_Empty_Char (End_Regexp_Sub, Create_End);
|
|
end if;
|
|
end loop;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Create_Start);
|
|
end if;
|
|
|
|
End_State := Create_End;
|
|
Last_Start := Create_Start;
|
|
end;
|
|
|
|
when '*' =>
|
|
Current_State := Current_State + 1;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Current_State);
|
|
end if;
|
|
|
|
Add_Empty_Char (Current_State, Current_State + 1);
|
|
Add_Empty_Char (Current_State, Current_State + 3);
|
|
Last_Start := Current_State;
|
|
|
|
Current_State := Current_State + 1;
|
|
|
|
for K in 0 .. Alphabet_Size loop
|
|
Set (Table, Current_State, K,
|
|
Value => Current_State + 1);
|
|
end loop;
|
|
|
|
Current_State := Current_State + 1;
|
|
Add_Empty_Char (Current_State, Current_State + 1);
|
|
|
|
Current_State := Current_State + 1;
|
|
Add_Empty_Char (Current_State, Last_Start);
|
|
End_State := Current_State;
|
|
|
|
when others =>
|
|
Current_State := Current_State + 1;
|
|
|
|
if S (J) = '?' then
|
|
for K in 0 .. Alphabet_Size loop
|
|
Set (Table, Current_State, K,
|
|
Value => Current_State + 1);
|
|
end loop;
|
|
|
|
else
|
|
if S (J) = '\' then
|
|
J := J + 1;
|
|
end if;
|
|
|
|
-- Create the state for the symbol S (J)
|
|
|
|
Set (Table, Current_State, Map (S (J)),
|
|
Value => Current_State + 1);
|
|
end if;
|
|
|
|
Last_Start := Current_State;
|
|
Current_State := Current_State + 1;
|
|
|
|
if End_State /= 0 then
|
|
Add_Empty_Char (End_State, Last_Start);
|
|
end if;
|
|
|
|
End_State := Current_State;
|
|
|
|
end case;
|
|
|
|
if Start_State = 0 then
|
|
Start_State := Last_Start;
|
|
end if;
|
|
|
|
J := J + 1;
|
|
end loop;
|
|
end Create_Simple;
|
|
|
|
-- Start of processing for Create_Primary_Table_Glob
|
|
|
|
begin
|
|
Table.all := (others => (others => 0));
|
|
Create_Simple (S'First, S'Last, Start_State, End_State);
|
|
Num_States := Current_State;
|
|
end Create_Primary_Table_Glob;
|
|
|
|
----------------------------
|
|
-- Create_Secondary_Table --
|
|
----------------------------
|
|
|
|
function Create_Secondary_Table
|
|
(First_Table : Regexp_Array_Access;
|
|
Num_States : State_Index;
|
|
Start_State : State_Index;
|
|
End_State : State_Index) return Regexp
|
|
is
|
|
pragma Warnings (Off, Num_States);
|
|
|
|
Last_Index : constant State_Index := First_Table'Last (1);
|
|
type Meta_State is array (1 .. Last_Index) of Boolean;
|
|
|
|
Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
|
|
(others => (others => 0));
|
|
|
|
Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
|
|
(others => (others => False));
|
|
|
|
Temp_State_Not_Null : Boolean;
|
|
|
|
Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
|
|
|
|
Current_State : State_Index := 1;
|
|
Nb_State : State_Index := 1;
|
|
|
|
procedure Closure
|
|
(State : in out Meta_State;
|
|
Item : State_Index);
|
|
-- Compute the closure of the state (that is every other state which
|
|
-- has a empty-character transition) and add it to the state
|
|
|
|
-------------
|
|
-- Closure --
|
|
-------------
|
|
|
|
procedure Closure
|
|
(State : in out Meta_State;
|
|
Item : State_Index)
|
|
is
|
|
begin
|
|
if State (Item) then
|
|
return;
|
|
end if;
|
|
|
|
State (Item) := True;
|
|
|
|
for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
|
|
if First_Table (Item, Column) = 0 then
|
|
return;
|
|
end if;
|
|
|
|
Closure (State, First_Table (Item, Column));
|
|
end loop;
|
|
end Closure;
|
|
|
|
-- Start of procesing for Create_Secondary_Table
|
|
|
|
begin
|
|
-- Create a new state
|
|
|
|
Closure (Meta_States (Current_State), Start_State);
|
|
|
|
while Current_State <= Nb_State loop
|
|
|
|
-- If this new meta-state includes the primary table end state,
|
|
-- then this meta-state will be a final state in the regexp
|
|
|
|
if Meta_States (Current_State)(End_State) then
|
|
Is_Final (Current_State) := True;
|
|
end if;
|
|
|
|
-- For every character in the regexp, calculate the possible
|
|
-- transitions from Current_State
|
|
|
|
for Column in 0 .. Alphabet_Size loop
|
|
Meta_States (Nb_State + 1) := (others => False);
|
|
Temp_State_Not_Null := False;
|
|
|
|
for K in Meta_States (Current_State)'Range loop
|
|
if Meta_States (Current_State)(K)
|
|
and then First_Table (K, Column) /= 0
|
|
then
|
|
Closure
|
|
(Meta_States (Nb_State + 1), First_Table (K, Column));
|
|
Temp_State_Not_Null := True;
|
|
end if;
|
|
end loop;
|
|
|
|
-- If at least one transition existed
|
|
|
|
if Temp_State_Not_Null then
|
|
|
|
-- Check if this new state corresponds to an old one
|
|
|
|
for K in 1 .. Nb_State loop
|
|
if Meta_States (K) = Meta_States (Nb_State + 1) then
|
|
Table (Current_State, Column) := K;
|
|
exit;
|
|
end if;
|
|
end loop;
|
|
|
|
-- If not, create a new state
|
|
|
|
if Table (Current_State, Column) = 0 then
|
|
Nb_State := Nb_State + 1;
|
|
Table (Current_State, Column) := Nb_State;
|
|
end if;
|
|
end if;
|
|
end loop;
|
|
|
|
Current_State := Current_State + 1;
|
|
end loop;
|
|
|
|
-- Returns the regexp
|
|
|
|
declare
|
|
R : Regexp_Access;
|
|
|
|
begin
|
|
R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
|
|
Num_States => Nb_State);
|
|
R.Map := Map;
|
|
R.Is_Final := Is_Final (1 .. Nb_State);
|
|
R.Case_Sensitive := Case_Sensitive;
|
|
|
|
for State in 1 .. Nb_State loop
|
|
for K in 0 .. Alphabet_Size loop
|
|
R.States (State, K) := Table (State, K);
|
|
end loop;
|
|
end loop;
|
|
|
|
return (Ada.Finalization.Controlled with R => R);
|
|
end;
|
|
end Create_Secondary_Table;
|
|
|
|
---------------------
|
|
-- Raise_Exception --
|
|
---------------------
|
|
|
|
procedure Raise_Exception
|
|
(M : String;
|
|
Index : Integer)
|
|
is
|
|
begin
|
|
Ada.Exceptions.Raise_Exception
|
|
(Error_In_Regexp'Identity, M & " at offset " & Index'Img);
|
|
end Raise_Exception;
|
|
|
|
-- Start of processing for Compile
|
|
|
|
begin
|
|
-- Special case for the empty string: it always matches, and the
|
|
-- following processing would fail on it.
|
|
if S = "" then
|
|
return (Ada.Finalization.Controlled with
|
|
R => new Regexp_Value'
|
|
(Alphabet_Size => 0,
|
|
Num_States => 1,
|
|
Map => (others => 0),
|
|
States => (others => (others => 1)),
|
|
Is_Final => (others => True),
|
|
Case_Sensitive => True));
|
|
end if;
|
|
|
|
if not Case_Sensitive then
|
|
GNAT.Case_Util.To_Lower (S);
|
|
end if;
|
|
|
|
Create_Mapping;
|
|
|
|
-- Creates the primary table
|
|
|
|
declare
|
|
Table : Regexp_Array_Access;
|
|
Num_States : State_Index;
|
|
Start_State : State_Index;
|
|
End_State : State_Index;
|
|
R : Regexp;
|
|
|
|
begin
|
|
Table := new Regexp_Array (1 .. 100,
|
|
0 .. Alphabet_Size + 10);
|
|
if not Glob then
|
|
Create_Primary_Table (Table, Num_States, Start_State, End_State);
|
|
else
|
|
Create_Primary_Table_Glob
|
|
(Table, Num_States, Start_State, End_State);
|
|
end if;
|
|
|
|
-- Creates the secondary table
|
|
|
|
R := Create_Secondary_Table
|
|
(Table, Num_States, Start_State, End_State);
|
|
Free (Table);
|
|
return R;
|
|
end;
|
|
end Compile;
|
|
|
|
--------------
|
|
-- Finalize --
|
|
--------------
|
|
|
|
procedure Finalize (R : in out Regexp) is
|
|
procedure Free is new
|
|
Unchecked_Deallocation (Regexp_Value, Regexp_Access);
|
|
|
|
begin
|
|
Free (R.R);
|
|
end Finalize;
|
|
|
|
---------
|
|
-- Get --
|
|
---------
|
|
|
|
function Get
|
|
(Table : Regexp_Array_Access;
|
|
State : State_Index;
|
|
Column : Column_Index) return State_Index
|
|
is
|
|
begin
|
|
if State <= Table'Last (1)
|
|
and then Column <= Table'Last (2)
|
|
then
|
|
return Table (State, Column);
|
|
else
|
|
return 0;
|
|
end if;
|
|
end Get;
|
|
|
|
-----------
|
|
-- Match --
|
|
-----------
|
|
|
|
function Match (S : String; R : Regexp) return Boolean is
|
|
Current_State : State_Index := 1;
|
|
|
|
begin
|
|
if R.R = null then
|
|
raise Constraint_Error;
|
|
end if;
|
|
|
|
for Char in S'Range loop
|
|
|
|
if R.R.Case_Sensitive then
|
|
Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
|
|
else
|
|
Current_State :=
|
|
R.R.States (Current_State,
|
|
R.R.Map (GNAT.Case_Util.To_Lower (S (Char))));
|
|
end if;
|
|
|
|
if Current_State = 0 then
|
|
return False;
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
return R.R.Is_Final (Current_State);
|
|
end Match;
|
|
|
|
---------
|
|
-- Set --
|
|
---------
|
|
|
|
procedure Set
|
|
(Table : in out Regexp_Array_Access;
|
|
State : State_Index;
|
|
Column : Column_Index;
|
|
Value : State_Index)
|
|
is
|
|
New_Lines : State_Index;
|
|
New_Columns : Column_Index;
|
|
New_Table : Regexp_Array_Access;
|
|
|
|
begin
|
|
if State <= Table'Last (1)
|
|
and then Column <= Table'Last (2)
|
|
then
|
|
Table (State, Column) := Value;
|
|
else
|
|
-- Doubles the size of the table until it is big enough that
|
|
-- (State, Column) is a valid index
|
|
|
|
New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
|
|
New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
|
|
New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
|
|
Table'First (2) .. New_Columns);
|
|
New_Table.all := (others => (others => 0));
|
|
|
|
for J in Table'Range (1) loop
|
|
for K in Table'Range (2) loop
|
|
New_Table (J, K) := Table (J, K);
|
|
end loop;
|
|
end loop;
|
|
|
|
Free (Table);
|
|
Table := New_Table;
|
|
Table (State, Column) := Value;
|
|
end if;
|
|
end Set;
|
|
|
|
end GNAT.Regexp;
|