2002-02-28  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (regcomp): Remove a redundant condition.
	(init_word_char): Add a check on malloc failure.
	(create_initial_state): Likewise.
	(duplicate_node): Likewise.
	(calc_eclosure): Likewise.
	(calc_eclosure_iter): Likewise.
	(parse_expression): Likewise.
	(parse_bracket_exp): Remove unnecessary malloc invocations.
	(build_equiv_class): Likewise.
	(build_charclass): Likewise.
	* posix/regex_internal.c (re_node_set_intersect): Add a check
	on malloc failure.
	(re_node_set_add_intersect): Likewise.
	(re_node_set_merge): Likewise.
	(re_acquire_state): Likewise.
	(re_acquire_state_context): Likewise.
	(create_newstate_common): Likewise.
	(register_state): Likewise.
	(create_ci_newstate): Likewise.
	(create_cd_newstate): Likewise.
	* posix/regex_internal.h: Fix prototypes of re_acquire_state
	and re_acquire_state_context.
	* posix/regexec.c (regexec): Suit it to the error handling of
	re_search_internal.
	(re_match): Likewise.
	(re_search): Likewise.
	(re_search_internal): Add a check on malloc failure.
	(acquire_init_state_context): Likewise.
	(check_matching): Likewise.
	(proceed_next_node): Likewise.
	(set_regs): Likewise.
	(sift_states_backward): Likewise.
	(sift_states_iter_bkref): Likewise.
	(add_epsilon_backreference): Likewise.
	(transit_state): Likewise.
	(transit_state_sb): Likewise.
	(transit_state_mb): Likewise.
	(transit_state_bkref_loop): Likewise.
	(build_trtable): Likewise.
	(group_nodes_into_DFAstates): Likewise.
	(match_ctx_init): Likewise.
	(match_ctx_add_entry): Likewise.
This commit is contained in:
Ulrich Drepper 2002-02-28 07:43:13 +00:00
parent 51f38e87b1
commit a9388965cc
5 changed files with 612 additions and 281 deletions

View File

@ -1,3 +1,48 @@
2002-02-28 Isamu Hasegawa <isamu@yamato.ibm.com>
* posix/regcomp.c (regcomp): Remove a redundant condition.
(init_word_char): Add a check on malloc failure.
(create_initial_state): Likewise.
(duplicate_node): Likewise.
(calc_eclosure): Likewise.
(calc_eclosure_iter): Likewise.
(parse_expression): Likewise.
(parse_bracket_exp): Remove unnecessary malloc invocations.
(build_equiv_class): Likewise.
(build_charclass): Likewise.
* posix/regex_internal.c (re_node_set_intersect): Add a check
on malloc failure.
(re_node_set_add_intersect): Likewise.
(re_node_set_merge): Likewise.
(re_acquire_state): Likewise.
(re_acquire_state_context): Likewise.
(create_newstate_common): Likewise.
(register_state): Likewise.
(create_ci_newstate): Likewise.
(create_cd_newstate): Likewise.
* posix/regex_internal.h: Fix prototypes of re_acquire_state
and re_acquire_state_context.
* posix/regexec.c (regexec): Suit it to the error handling of
re_search_internal.
(re_match): Likewise.
(re_search): Likewise.
(re_search_internal): Add a check on malloc failure.
(acquire_init_state_context): Likewise.
(check_matching): Likewise.
(proceed_next_node): Likewise.
(set_regs): Likewise.
(sift_states_backward): Likewise.
(sift_states_iter_bkref): Likewise.
(add_epsilon_backreference): Likewise.
(transit_state): Likewise.
(transit_state_sb): Likewise.
(transit_state_mb): Likewise.
(transit_state_bkref_loop): Likewise.
(build_trtable): Likewise.
(group_nodes_into_DFAstates): Likewise.
(match_ctx_init): Likewise.
(match_ctx_add_entry): Likewise.
2002-02-27 Ulrich Drepper <drepper@redhat.com>
* elf/dl-load.c (_dl_map_object_from_fd): Always add SONAME to

View File

@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state,
char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
static void init_word_char (re_dfa_t *dfa);
static reg_errcode_t init_word_char (re_dfa_t *dfa);
static void free_charset (re_charset_t *cset);
static void free_workarea_compile (regex_t *preg);
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
static int duplicate_node (re_dfa_t *dfa, int org_idx,
unsigned int constraint);
static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root);
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
int node, int root);
static void calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax);
@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags)
if (ret == REG_ERPAREN)
ret = REG_EPAREN;
/* XXX Why the test for preg->fastmap != NULL? */
if (ret == REG_NOERROR && preg->fastmap != NULL)
/* We have already checked preg->fastmap != NULL. */
if (ret == REG_NOERROR)
{
/* Compute the fastmap now, since regexec cannot modify the pattern
buffer. */
@ -772,16 +773,19 @@ init_dfa (dfa, pat_len)
"word". In this case "word" means that it is the word construction
character used by some operators like "\<", "\>", etc. */
static void
static reg_errcode_t
init_word_char (dfa)
re_dfa_t *dfa;
{
int i, j, ch;
dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
if (dfa->word_char == NULL)
return REG_ESPACE;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= 1 << j;
return REG_NOERROR;
}
/* Free the work area which are only used while compiling. */
@ -844,24 +848,28 @@ create_initial_state (dfa)
}
/* It must be the first time to invoke acquire_state. */
dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0);
dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
/* We don't check ERR here, since the initial state must not be NULL. */
if (dfa->init_state == NULL)
return err;
if (dfa->init_state->has_constraint)
{
dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes,
dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_WORD);
dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes,
dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_NEWLINE);
dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes,
dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
&init_nodes,
CONTEXT_NEWLINE
| CONTEXT_BEGBUF);
if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|| dfa->init_state_begbuf == NULL)
return err;
}
else
dfa->init_state_word = dfa->init_state_nl
= dfa->init_state_begbuf = dfa->init_state;
if (dfa->init_state == NULL || dfa->init_state_word == NULL
|| dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL )
return REG_ESPACE;
re_node_set_free (&init_nodes);
return REG_NOERROR;
}
@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node)
}
}
static int
duplicate_node (dfa, org_idx, constraint)
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
otherwise return the error code. */
static reg_errcode_t
duplicate_node (new_idx, dfa, org_idx, constraint)
re_dfa_t *dfa;
int org_idx;
int *new_idx, org_idx;
unsigned int constraint;
{
re_token_t dup;
int dup_idx;
reg_errcode_t err;
dup.type = OP_CONTEXT_NODE;
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
{
/* If the node whose index is ORG_IDX is the same as the intended
node, use it. */
if (dfa->nodes[org_idx].constraint == constraint)
return org_idx;
{
*new_idx = org_idx;
return REG_NOERROR;
}
dup.constraint = constraint |
dfa->nodes[org_idx].constraint;
}
@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint)
/* In case that `entity' points OP_CONTEXT_NODE,
we correct `entity' to real entity in calc_inveclosures(). */
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
dup_idx = re_dfa_add_node (dfa, dup, 1);
if (dup.opr.ctx_info == NULL || dup_idx == -1)
return REG_ESPACE;
dup.opr.ctx_info->entity = org_idx;
dup.opr.ctx_info->bkref_eclosure = NULL;
dup_idx = re_dfa_add_node (dfa, dup, 1);
dfa->nodes[dup_idx].duplicated = 1;
dfa->nodes[dup_idx].duplicated = 1;
dfa->firsts[dup_idx] = dfa->firsts[org_idx];
dfa->nexts[dup_idx] = dfa->nexts[org_idx];
re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
if (err != REG_NOERROR)
return err;
/* Since we don't duplicate epsilon nodes, epsilon closure have
only itself. */
re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
if (err != REG_NOERROR)
return err;
err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
if (err != REG_NOERROR)
return err;
/* Then we must update inveclosure for this node.
We process them at last part of calc_eclosure(),
since we don't complete to calculate them here. */
return dup_idx;
*new_idx = dup_idx;
return REG_NOERROR;
}
static void
@ -1193,6 +1220,7 @@ calc_eclosure (dfa)
/* For each nodes, calculate epsilon closure. */
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
{
reg_errcode_t err;
re_node_set eclosure_elem;
if (node_idx == max)
{
@ -1210,7 +1238,9 @@ calc_eclosure (dfa)
if (dfa->eclosures[node_idx].nelem != 0)
continue;
/* Calculate epsilon closure of `node_idx'. */
eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1);
err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
if (err != REG_NOERROR)
return err;
if (dfa->eclosures[node_idx].nelem == 0)
{
@ -1241,7 +1271,13 @@ calc_eclosure (dfa)
{
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint);
{
reg_errcode_t err;
err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
constraint);
if (err != REG_NOERROR)
return err;
}
re_node_set_insert (bkref_eclosure, dest_node_idx);
}
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
@ -1252,15 +1288,19 @@ calc_eclosure (dfa)
/* Calculate epsilon closure of NODE. */
static re_node_set
calc_eclosure_iter (dfa, node, root)
static reg_errcode_t
calc_eclosure_iter (new_set, dfa, node, root)
re_node_set *new_set;
re_dfa_t *dfa;
int node, root;
{
reg_errcode_t err;
unsigned int constraint;
int i, max, incomplete = 0;
re_node_set eclosure;
re_node_set_alloc (&eclosure, 1);
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (err != REG_NOERROR)
return err;
/* This indicates that we are calculating this node now.
We reference this value to avoid infinite loop. */
@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root)
/* If we haven't calculated the epsilon closure of `edest' yet,
calculate now. Otherwise use calculated epsilon closure. */
if (dfa->eclosures[edest].nelem == 0)
eclosure_elem = calc_eclosure_iter (dfa, edest, 0);
{
err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
if (err != REG_NOERROR)
return err;
}
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */
@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root)
int dest = eclosure.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
{
int dup_dest = duplicate_node (dfa, dest, constraint);
int dup_dest;
reg_errcode_t err;
err = duplicate_node (&dup_dest, dfa, dest, constraint);
if (err != REG_NOERROR)
return err;
if (dest != dup_dest)
{
re_node_set_remove_at (&eclosure, i--);
@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root)
dfa->eclosures[node].nelem = 0;
else
dfa->eclosures[node] = eclosure;
return eclosure;
*new_set = eclosure;
return REG_NOERROR;
}
/* Functions for token which are used in the parser. */
@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err)
break;
case ANCHOR:
if (dfa->word_char == NULL)
init_word_char (dfa);
{
*err = init_word_char (dfa);
if (*err != REG_NOERROR)
return NULL;
}
if (token->opr.ctx_type == WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
I'm not sure, but maybe enough. */
#define BRACKET_NAME_BUF_SIZE 32
static inline void *
extend_array_for_cset (array, num, alloc, type_size)
void *array;
int num, *alloc, type_size;
{
void *new_array = array;
if (*alloc == num)
{
if (*alloc == 0)
{
new_array = malloc (type_size);
*alloc = 1;
}
else
{
new_array = realloc (array, type_size * num * 2);
*alloc = 2 * num;
}
}
return new_array;
}
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
uint32_t *new_array_end;
int new_nranges;
/* XXX If mbcset->range_starts and mbcset->range_ends are NULL
if *range_alloc == 0 then we do not need the if. */
if (*range_alloc == 0)
{
new_nranges = 1;
new_array_start = re_malloc (uint32_t, 1);
new_array_end = re_malloc (uint32_t, 1);
}
else
{
new_nranges = 2 * mbcset->nranges;
new_array_start = re_realloc (mbcset->range_starts, uint32_t,
new_nranges);
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
new_nranges);
}
/* +1 in case of mbcset->nranges is 0. */
new_nranges = 2 * mbcset->nranges + 1;
/* Use realloc since mbcset->range_starts and mbcset->range_ends
are NULL if *range_alloc == 0. */
new_array_start = re_realloc (mbcset->range_starts, uint32_t,
new_nranges);
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
new_nranges);
if (new_array_start == NULL || new_array_end == NULL)
return REG_ESPACE;
@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Got valid collation sequence, add it as a new entry. */
/* Check the space of the arrays. */
mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms,
mbcset->ncoll_syms,
coll_sym_alloc,
sizeof (int32_t));
if (mbcset->coll_syms == NULL)
return REG_ESPACE;
if (*coll_sym_alloc == mbcset->ncoll_syms)
{
/* Not enough, realloc it. */
/* +1 in case of mbcset->ncoll_syms is 0. */
*coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
/* Use realloc since mbcset->coll_syms is NULL
if *alloc == 0. */
mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
*coll_sym_alloc);
if (mbcset->coll_syms == NULL)
return REG_ESPACE;
}
mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
return REG_NOERROR;
}
@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
bitset_set (sbcset, start_elem.opr.ch);
break;
case MB_CHAR:
mbcset->mbchars = extend_array_for_cset (mbcset->mbchars,
mbcset->nmbchars,
&mbchar_alloc,
sizeof (wchar_t));
if (mbcset->mbchars == NULL)
goto parse_bracket_exp_espace;
/* Check whether the array has enough space. */
if (mbchar_alloc == mbcset->nmbchars)
{
/* Not enough, realloc it. */
/* +1 in case of mbcset->nmbchars is 0. */
mbchar_alloc = 2 * mbcset->nmbchars + 1;
/* Use realloc since array is NULL if *alloc == 0. */
mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
mbchar_alloc);
if (mbcset->mbchars == NULL)
goto parse_bracket_exp_espace;
}
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
break;
case EQUIV_CLASS:
@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name)
bitset_set (sbcset, ch);
}
}
/* Check the space of the arrays, and extend if we need. */
mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes,
mbcset->nequiv_classes,
equiv_class_alloc,
sizeof (int32_t));
if (mbcset->equiv_classes == NULL)
return REG_ESPACE;
/* Check whether the array has enough space. */
if (*equiv_class_alloc == mbcset->nequiv_classes)
{
/* Not enough, realloc it. */
/* +1 in case of mbcset->nequiv_classes is 0. */
*equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
/* Use realloc since the array is NULL if *alloc == 0. */
mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
*equiv_class_alloc);
if (mbcset->equiv_classes == NULL)
return REG_ESPACE;
}
mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
}
else
@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name)
int i;
/* Check the space of the arrays. */
mbcset->char_classes = extend_array_for_cset (mbcset->char_classes,
mbcset->nchar_classes,
char_class_alloc,
sizeof (wctype_t));
if (mbcset->char_classes == NULL)
return REG_ESPACE;
if (*char_class_alloc == mbcset->nchar_classes)
{
/* Not enough, realloc it. */
/* +1 in case of mbcset->nchar_classes is 0. */
*char_class_alloc = 2 * mbcset->nchar_classes + 1;
/* Use realloc since array is NULL if *alloc == 0. */
mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
*char_class_alloc);
if (mbcset->char_classes == NULL)
return REG_ESPACE;
}
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);

View File

@ -68,6 +68,8 @@ static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash);
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
unsigned int hash);
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash);
@ -473,6 +475,10 @@ re_node_set_init_copy (dest, src)
return REG_NOERROR;
}
/* Calculate the intersection of the sets SRC1 and SRC2. And store it in
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
static reg_errcode_t
re_node_set_intersect (dest, src1, src2)
re_node_set *dest;
@ -483,31 +489,28 @@ re_node_set_intersect (dest, src1, src2)
{
if (src1->nelem + src2->nelem > dest->alloc)
{
int *new_array;
if (dest->alloc == 0)
new_array = re_malloc (int, src1->nelem + src2->nelem);
else
new_array = re_realloc (dest->elems, int,
src1->nelem + src2->nelem);
dest->alloc = src1->nelem + src2->nelem;
if (new_array == NULL)
dest->elems = re_realloc (dest->elems, int, dest->alloc);
if (dest->elems == NULL)
return REG_ESPACE;
dest->elems = new_array;
}
}
else
{
/* The intersection of empty sets is also empty set. */
dest->nelem = 0;
return REG_NOERROR;
}
for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
{
if (src1->elems[i1] > src2->elems[i2])
{
++i2;
continue;
}
/* The intersection must have the elements which are in both of
SRC1 and SRC2. */
if (src1->elems[i1] == src2->elems[i2])
dest->elems[id++] = src2->elems[i2++];
++i1;
@ -516,6 +519,10 @@ re_node_set_intersect (dest, src1, src2)
return REG_NOERROR;
}
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
static reg_errcode_t
re_node_set_add_intersect (dest, src1, src2)
re_node_set *dest;
@ -526,16 +533,10 @@ re_node_set_add_intersect (dest, src1, src2)
{
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
{
int *new_array;
if (dest->alloc == 0)
new_array = re_malloc (int, src1->nelem + src2->nelem);
else
new_array = re_realloc (dest->elems, int,
src1->nelem + src2->nelem + dest->nelem);
dest->alloc = src1->nelem + src2->nelem + dest->nelem;
if (new_array == NULL)
dest->elems = re_realloc (dest->elems, int, dest->alloc);
if (dest->elems == NULL)
return REG_ESPACE;
dest->elems = new_array;
}
}
else
@ -567,6 +568,9 @@ re_node_set_add_intersect (dest, src1, src2)
return REG_NOERROR;
}
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t
re_node_set_init_union (dest, src1, src2)
re_node_set *dest;
@ -617,6 +621,9 @@ re_node_set_init_union (dest, src1, src2)
return REG_NOERROR;
}
/* Calculate the union set of the sets DEST and SRC. And store it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t
re_node_set_merge (dest, src)
re_node_set *dest;
@ -628,12 +635,16 @@ re_node_set_merge (dest, src)
else if (dest == NULL)
{
dest = re_malloc (re_node_set, 1);
if (dest == NULL)
return REG_ESPACE;
return re_node_set_init_copy (dest, src);
}
if (dest->alloc < src->nelem + dest->nelem)
{
dest->alloc = 2 * (src->nelem + dest->alloc);
dest->elems = re_realloc (dest->elems, int, dest->alloc);
if (dest->elems == NULL)
return REG_ESPACE;
}
for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
@ -860,18 +871,28 @@ calc_state_hash (nodes, context)
/* Search for the state whose node_set is equivalent to NODES.
Return the pointer to the state, if we found it in the DFA.
Otherwise create the new one and return it. */
Otherwise create the new one and return it. In case of an error
return NULL and set the error code in ERR.
Note: - We assume NULL as the invalid state, then it is possible that
return value is NULL and ERR is REG_NOERROR.
- We never return non-NULL value in case of any errors, it is for
optimization. */
static re_dfastate_t *
re_acquire_state (dfa, nodes)
static re_dfastate_t*
re_acquire_state (err, dfa, nodes)
reg_errcode_t *err;
re_dfa_t *dfa;
const re_node_set *nodes;
{
unsigned int hash;
re_dfastate_t *new_state;
struct re_state_table_entry *spot;
int i;
if (nodes->nelem == 0)
return NULL;
{
*err = REG_NOERROR;
return NULL;
}
hash = calc_state_hash (nodes, 0);
spot = dfa->state_table + (hash & dfa->state_hash_mask);
@ -893,25 +914,42 @@ re_acquire_state (dfa, nodes)
}
/* There are no appropriate state in the dfa, create the new one. */
return create_ci_newstate (dfa, nodes, hash);
new_state = create_ci_newstate (dfa, nodes, hash);
if (new_state != NULL)
return new_state;
else
{
*err = REG_ESPACE;
return NULL;
}
}
/* Search for the state whose node_set is equivalent to NODES and
whose context is equivalent to CONTEXT.
Return the pointer to the state, if we found it in the DFA.
Otherwise create the new one and return it. */
Otherwise create the new one and return it. In case of an error
return NULL and set the error code in ERR.
Note: - We assume NULL as the invalid state, then it is possible that
return value is NULL and ERR is REG_NOERROR.
- We never return non-NULL value in case of any errors, it is for
optimization. */
static re_dfastate_t *
re_acquire_state_context (dfa, nodes, context)
static re_dfastate_t*
re_acquire_state_context (err, dfa, nodes, context)
reg_errcode_t *err;
re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context;
{
unsigned int hash;
re_dfastate_t *new_state;
struct re_state_table_entry *spot;
int i;
if (nodes->nelem == 0)
return NULL;
{
*err = REG_NOERROR;
return NULL;
}
hash = calc_state_hash (nodes, context);
spot = dfa->state_table + (hash & dfa->state_hash_mask);
@ -934,9 +972,19 @@ re_acquire_state_context (dfa, nodes, context)
return state;
}
/* There are no appropriate state in `dfa', create the new one. */
return create_cd_newstate (dfa, nodes, context, hash);
new_state = create_cd_newstate (dfa, nodes, context, hash);
if (new_state != NULL)
return new_state;
else
{
*err = REG_ESPACE;
return NULL;
}
}
/* Allocate memory for DFA state and initialize common properties.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
create_newstate_common (dfa, nodes, hash)
re_dfa_t *dfa;
@ -945,6 +993,8 @@ create_newstate_common (dfa, nodes, hash)
{
re_dfastate_t *newstate;
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
if (newstate == NULL)
return NULL;
re_node_set_init_copy (&newstate->nodes, nodes);
newstate->trtable = NULL;
newstate->trtable_search = NULL;
@ -952,7 +1002,10 @@ create_newstate_common (dfa, nodes, hash)
return newstate;
}
static void
/* Store the new state NEWSTATE whose hash value is HASH in appropriate
position. Return value indicate the error code if failed. */
static reg_errcode_t
register_state (dfa, newstate, hash)
re_dfa_t *dfa;
re_dfastate_t *newstate;
@ -972,8 +1025,7 @@ register_state (dfa, newstate, hash)
spot->alloc = 4;
new_array = re_malloc (re_dfastate_t *, spot->alloc);
if (new_array == NULL)
/* XXX return value */
return;
return REG_ESPACE;
new_array[0] = spot->entry.state;
}
else
@ -985,8 +1037,12 @@ register_state (dfa, newstate, hash)
spot->entry.array = new_array;
}
spot->entry.array[spot->num++] = newstate;
return REG_NOERROR;
}
/* Create the new state which is independ of contexts.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash)
re_dfa_t *dfa;
@ -994,8 +1050,11 @@ create_ci_newstate (dfa, nodes, hash)
unsigned int hash;
{
int i;
reg_errcode_t err;
re_dfastate_t *newstate;
newstate = create_newstate_common (dfa, nodes, hash);
if (newstate == NULL)
return NULL;
newstate->entrance_nodes = &newstate->nodes;
for (i = 0 ; i < nodes->nelem ; i++)
@ -1021,11 +1080,13 @@ create_ci_newstate (dfa, nodes, hash)
newstate->halt = 1;
}
}
register_state (dfa, newstate, hash);
return newstate;
err = register_state (dfa, newstate, hash);
return (err != REG_NOERROR) ? NULL : newstate;
}
/* Create the new state which is depend on the context CONTEXT.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash)
re_dfa_t *dfa;
@ -1033,9 +1094,12 @@ create_cd_newstate (dfa, nodes, context, hash)
unsigned int context, hash;
{
int i, nctx_nodes = 0;
reg_errcode_t err;
re_dfastate_t *newstate;
newstate = create_newstate_common (dfa, nodes, hash);
if (newstate == NULL)
return NULL;
newstate->context = context;
newstate->entrance_nodes = &newstate->nodes;
@ -1076,7 +1140,6 @@ create_cd_newstate (dfa, nodes, context, hash)
{
newstate->entrance_nodes = re_malloc (re_node_set, 1);
if (newstate->entrance_nodes == NULL)
/* XXX Return which value? */
return NULL;
re_node_set_init_copy (newstate->entrance_nodes, nodes);
nctx_nodes = 0;
@ -1090,6 +1153,6 @@ create_cd_newstate (dfa, nodes, context, hash)
}
}
}
register_state (dfa, newstate, hash);
return newstate;
err = register_state (dfa, newstate, hash);
return (err != REG_NOERROR) ? NULL : newstate;
}

View File

@ -426,9 +426,10 @@ static void re_node_set_remove_at (re_node_set *set, int idx);
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
static re_dfastate_t *re_acquire_state (re_dfa_t *dfa,
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
const re_node_set *nodes);
static re_dfastate_t *re_acquire_state_context (re_dfa_t *dfa,
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context);

View File

@ -38,15 +38,19 @@
#include "regex.h"
#include "regex_internal.h"
static void match_ctx_init (re_match_context_t *cache, int eflags, int n);
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
int n);
static void match_ctx_free (re_match_context_t *cache);
static void match_ctx_add_entry (re_match_context_t *cache, int node, int from,
int to);
static int re_search_internal (const regex_t *preg, const char *string,
int length, int start, int range, size_t nmatch,
regmatch_t pmatch[], int eflags);
static inline re_dfastate_t *acquire_init_state_context (const regex_t *preg,
const re_string_t *input, int idx, int eflags);
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int from, int to);
static reg_errcode_t re_search_internal (const regex_t *preg,
const char *string, int length,
int start, int range, size_t nmatch,
regmatch_t pmatch[], int eflags);
static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
const regex_t *preg,
const re_string_t *input,
int idx, int eflags);
static int check_matching (const regex_t *preg, re_string_t *input,
re_match_context_t *mctx, re_dfastate_t **state_log,
int start_idx, int fl_search, int fl_longest_match);
@ -61,9 +65,10 @@ static int proceed_next_node (const regex_t *preg,
const re_match_context_t *mctx,
const re_string_t *input,
int *pidx, int node, re_node_set *eps_via_nodes);
static void set_regs (const regex_t *preg, re_dfastate_t **state_log,
const re_match_context_t *mctx, const re_string_t *input,
size_t nmatch, regmatch_t *pmatch, int last);
static reg_errcode_t set_regs (const regex_t *preg, re_dfastate_t **state_log,
const re_match_context_t *mctx,
const re_string_t *input, size_t nmatch,
regmatch_t *pmatch, int last);
static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log,
const re_match_context_t *mctx,
const re_string_t *input, int node_idx,
@ -73,36 +78,40 @@ static int sift_states_iter_bkref (const re_dfa_t *dfa,
struct re_backref_cache_entry *mctx_entry,
int node_idx, int idx, int match_first,
int match_last);
static void sift_states_backward (const regex_t *preg,
re_dfastate_t **state_log,
const re_match_context_t *mctx,
const re_string_t *input, int last_node);
static void add_epsilon_backreference (const re_dfa_t *dfa,
const re_match_context_t *mctx,
const re_node_set *plog, int idx,
re_node_set *state_buf);
static re_dfastate_t *transit_state (const regex_t *preg, re_dfastate_t *state,
re_string_t *input, int fl_search,
re_dfastate_t **state_log,
static reg_errcode_t sift_states_backward (const regex_t *preg,
re_dfastate_t **state_log,
const re_match_context_t *mctx,
const re_string_t *input,
int last_node);
static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
const re_match_context_t *mctx,
const re_node_set *plog,
int idx,
re_node_set *state_buf);
static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *state, re_string_t *input,
int fl_search, re_dfastate_t **state_log,
re_match_context_t *mctx);
static re_dfastate_t *transit_state_sb (const regex_t *preg,
static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *pstate,
re_string_t *input, int fl_search,
re_match_context_t *mctx);
static void transit_state_mb (const regex_t *preg, re_dfastate_t *pstate,
const re_string_t *input,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static void transit_state_bkref (const regex_t *preg, re_dfastate_t *pstate,
const re_string_t *input,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static void transit_state_bkref_loop (const regex_t *preg,
const re_string_t *input,
re_node_set *nodes,
re_dfastate_t **work_state_log,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static reg_errcode_t transit_state_mb (const regex_t *preg,
re_dfastate_t *pstate,
const re_string_t *input,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static reg_errcode_t transit_state_bkref (const regex_t *preg,
re_dfastate_t *pstate,
const re_string_t *input,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
const re_string_t *input,
re_node_set *nodes,
re_dfastate_t **work_state_log,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static re_dfastate_t **build_trtable (const regex_t *dfa,
const re_dfastate_t *state,
int fl_search);
@ -141,13 +150,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
regmatch_t pmatch[];
int eflags;
{
reg_errcode_t err;
int length = strlen (string);
if (preg->no_sub)
return re_search_internal (preg, string, length, 0, length, 0,
NULL, eflags);
err = re_search_internal (preg, string, length, 0, length, 0,
NULL, eflags);
else
return re_search_internal (preg, string, length, 0, length, nmatch,
pmatch, eflags);
err = re_search_internal (preg, string, length, 0, length, nmatch,
pmatch, eflags);
return err != REG_NOERROR;
}
#ifdef _LIBC
weak_alias (__regexec, regexec)
@ -164,7 +175,8 @@ re_match (buffer, string, length, start, regs)
int length, start;
struct re_registers *regs;
{
int i, nregs, result, rval, eflags = 0;
reg_errcode_t result;
int i, nregs, rval, eflags = 0;
regmatch_t *pmatch;
eflags |= (buffer->not_bol) ? REG_NOTBOL : 0;
@ -238,7 +250,7 @@ re_match (buffer, string, length, start, regs)
}
}
/* Return value is -1 if not match, the length of mathing otherwise. */
rval = (result) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
re_free (pmatch);
return rval;
}
@ -290,7 +302,8 @@ re_search (bufp, string, size, startpos, range, regs)
int size, startpos, range;
struct re_registers *regs;
{
int i, nregs, result, real_range, rval, eflags = 0;
reg_errcode_t result;
int i, nregs, real_range, rval, eflags = 0;
regmatch_t *pmatch;
eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
@ -376,7 +389,7 @@ re_search (bufp, string, size, startpos, range, regs)
}
/* Return value is -1 if not match, the position where the mathing starts
otherwise. */
rval = (result) ? -1 : pmatch[0].rm_so;
rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so;
re_free (pmatch);
return rval;
}
@ -486,11 +499,12 @@ static re_node_set empty_set;
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
mingings with regexec. START, and RANGE have the same meanings
with re_search.
Return 0 if we find a match and REG_NOMATCH if not.
Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
otherwise return the error code.
Note: We assume front end functions already check ranges.
(START + RANGE >= 0 && START + RANGE <= LENGTH) */
static int
static reg_errcode_t
re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
const regex_t *preg;
const char *string;
@ -498,6 +512,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
size_t nmatch;
regmatch_t pmatch[];
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_string_t input;
re_dfastate_t **state_log;
@ -510,7 +525,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
if (preg->used == 0 || dfa->init_state == NULL
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|| dfa->init_state_begbuf == NULL)
return 1;
return REG_NOMATCH;
re_node_set_init_empty (&empty_set);
@ -522,16 +537,24 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
back-reference or a node which can accept multibyte character or
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
state_log = re_malloc (re_dfastate_t *, length + 1);
{
state_log = re_malloc (re_dfastate_t *, length + 1);
if (state_log == NULL)
return REG_ESPACE;
}
else
state_log = NULL;
if (preg->syntax & RE_ICASE)
re_string_construct_toupper (&input, string, length, preg->translate);
err = re_string_construct_toupper (&input, string, length, preg->translate);
else
re_string_construct (&input, string, length, preg->translate);
err = re_string_construct (&input, string, length, preg->translate);
if (err != REG_NOERROR)
return err;
match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
if (err != REG_NOERROR)
return err;
#ifdef DEBUG
/* We assume front-end functions already check them. */
@ -557,7 +580,12 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
match_last = check_matching (preg, &input, &mctx, state_log,
match_first, 0, fl_longest_match);
if (match_last != -1)
break;
{
if (match_last == -2)
return REG_ESPACE;
else
break; /* We found a matching. */
}
}
}
/* Update counter. */
@ -598,8 +626,13 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
#endif
halt_node = check_halt_state_context (preg, pstate, &input,
match_last, eflags);
sift_states_backward (preg, state_log, &mctx, &input, halt_node);
set_regs (preg, state_log, &mctx, &input, nmatch, pmatch, halt_node);
err = sift_states_backward (preg, state_log, &mctx, &input, halt_node);
if (err != REG_NOERROR)
return err;
err = set_regs (preg, state_log, &mctx, &input, nmatch, pmatch,
halt_node);
if (err != REG_NOERROR)
return err;
}
}
@ -607,21 +640,23 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
if (dfa->nbackref)
match_ctx_free (&mctx);
re_string_destruct (&input);
return match_last == -1;
return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
}
/* Acquire an initial state.
/* Acquire an initial state and return it.
We must select appropriate initial state depending on the context,
since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t *
acquire_init_state_context (preg, input, idx, eflags)
const regex_t *preg;
const re_string_t *input;
int idx, eflags;
acquire_init_state_context (err, preg, input, idx, eflags)
reg_errcode_t *err;
const regex_t *preg;
const re_string_t *input;
int idx, eflags;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
*err = REG_NOERROR;
if (dfa->init_state->has_constraint)
{
unsigned int context;
@ -636,9 +671,12 @@ acquire_init_state_context (preg, input, idx, eflags)
else if (IS_NEWLINE_CONTEXT (context))
return dfa->init_state_nl;
else if (IS_BEGBUF_CONTEXT (context))
/* It is relatively rare case, then calculate on demand. */
return re_acquire_state_context (dfa, dfa->init_state->entrance_nodes,
context);
{
/* It is relatively rare case, then calculate on demand. */
return re_acquire_state_context (err, dfa,
dfa->init_state->entrance_nodes,
context);
}
else
/* Must not happen? */
return dfa->init_state;
@ -648,7 +686,8 @@ acquire_init_state_context (preg, input, idx, eflags)
}
/* Check whether the regular expression match input string INPUT or not,
and return the index where the matching end, or return -1 if not match.
and return the index where the matching end, return -1 if not match,
or return -2 in case of an error.
FL_SEARCH means we must search where the matching starts,
FL_LONGEST_MATCH means we want the POSIX longest matching. */
@ -661,11 +700,15 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
re_dfastate_t **state_log;
int start_idx, fl_search, fl_longest_match;
{
reg_errcode_t err;
int match = 0, match_last = -1;
re_dfastate_t *cur_state;
cur_state = acquire_init_state_context (preg, input, start_idx,
cur_state = acquire_init_state_context (&err, preg, input, start_idx,
mctx->eflags);
/* An initial state must not be NULL(invalid state). */
if (cur_state == NULL)
return -2;
if (state_log != NULL)
state_log[start_idx] = cur_state;
/* If the RE accepts NULL string. */
@ -687,11 +730,13 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
while (!re_string_eoi (input))
{
cur_state = transit_state (preg, cur_state, input, fl_search && !match,
state_log, mctx);
if (cur_state == NULL) /* Reached at the invalid state. */
cur_state = transit_state (&err, preg, cur_state, input,
fl_search && !match, state_log, mctx);
if (cur_state == NULL) /* Reached at the invalid state or an error. */
{
int cur_str_idx = re_string_cur_idx (input);
if (err != REG_NOERROR)
return -2;
if (fl_search && !match)
{
/* Restart from initial state, since we are searching
@ -699,9 +744,11 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx))
#endif /* RE_ENABLE_I18N */
cur_state = acquire_init_state_context (preg, input,
cur_state = acquire_init_state_context (&err, preg, input,
cur_str_idx,
mctx->eflags);
if (cur_state == NULL && err != REG_NOERROR)
return -2;
if (state_log != NULL)
state_log[cur_str_idx] = cur_state;
}
@ -787,9 +834,10 @@ check_halt_state_context (preg, state, input, idx, eflags)
return 0;
}
/* Compute the next node to which "NFA" transit from NODE.
Return the destination node, and update EPS_VIA_NODES.
("NFA" is a NFA corresponding to the DFA. */
/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
corresponding to the DFA).
Return the destination node, and update EPS_VIA_NODES, return -1 in case
of errors. */
static int
proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
@ -801,10 +849,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
re_node_set *eps_via_nodes;
{
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
int i, dest_node = -1;
int i, dest_node = -1, err;
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
re_node_set_insert (eps_via_nodes, node);
err = re_node_set_insert (eps_via_nodes, node);
if (err < 0)
return -1;
for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i)
{
int candidate = state_log[*pidx]->nodes.elems[i];
@ -845,7 +895,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
}
if (naccepted == 0)
{
re_node_set_insert (eps_via_nodes, node);
err = re_node_set_insert (eps_via_nodes, node);
if (err < 0)
return -1;
dest_node = dfa->nexts[node];
if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node))
return dest_node;
@ -885,7 +937,7 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
Note: We assume that pmatch[0] is already set, and
pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
static void
static reg_errcode_t
set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
const regex_t *preg;
re_dfastate_t **state_log;
@ -944,9 +996,11 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
/* Proceed to next node. */
cur_node = proceed_next_node (preg, state_log, mctx, input, &idx,
cur_node, &eps_via_nodes);
if (cur_node < 0)
return REG_ESPACE;
}
re_node_set_free (&eps_via_nodes);
return;
return REG_NOERROR;
}
#define NUMBER_OF_STATE 1
@ -974,7 +1028,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
#define STATE_NODE_CONTAINS(state,node) \
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
static void
static reg_errcode_t
sift_states_backward (preg, state_log, mctx, input, last_node)
const regex_t *preg;
re_dfastate_t **state_log;
@ -982,6 +1036,7 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
const re_string_t *input;
int last_node;
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_node_set state_buf;
int str_idx = mctx->match_last;
@ -990,18 +1045,28 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
#ifdef DEBUG
assert (state_log != NULL && state_log[str_idx] != NULL);
#endif
re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
if (err != REG_NOERROR)
return err;
plog = &state_log[str_idx]->nodes;
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
transit to the last_node and the last_node itself. */
re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
if (err != REG_NOERROR)
return err;
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
{
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
if (err != REG_NOERROR)
return err;
}
/* Update state log. */
state_log[str_idx] = re_acquire_state (dfa, &state_buf);
state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
if (state_log[str_idx] == NULL && err != REG_NOERROR)
return err;
/* Then check each states in the state_log. */
while (str_idx > mctx->match_first)
@ -1062,17 +1127,26 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
/* `prev_node' may point the entity of the OP_CONTEXT_NODE,
then we use plog->elems[i] instead. */
re_node_set_add_intersect (&state_buf, plog,
dfa->inveclosures + prev_node);
err = re_node_set_add_intersect (&state_buf, plog,
dfa->inveclosures + prev_node);
if (err != REG_NOERROR)
return err;
}
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
{
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
if (err != REG_NOERROR)
return err;
}
/* Update state_log. */
state_log[str_idx] = re_acquire_state (dfa, &state_buf);
state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
if (state_log[str_idx] == NULL && err != REG_NOERROR)
return err;
}
re_node_set_free (&state_buf);
return REG_NOERROR;
}
/* Helper functions. */
@ -1136,7 +1210,7 @@ sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_first,
return naccepted;
}
static void
static reg_errcode_t
add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
const re_dfa_t *dfa;
const re_match_context_t *mctx;
@ -1164,12 +1238,16 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
}
if (j < mctx->nbkref_ents || idx == mctx->match_first)
{
re_node_set_add_intersect (state_buf, plog,
dfa->inveclosures + node_idx);
reg_errcode_t err;
err = re_node_set_add_intersect (state_buf, plog,
dfa->inveclosures + node_idx);
if (err != REG_NOERROR)
return err;
i = 0;
}
}
}
return REG_NOERROR;
}
/* Functions for state transition. */
@ -1180,17 +1258,19 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
update the destination of STATE_LOG. */
static re_dfastate_t *
transit_state (preg, state, input, fl_search, state_log, mctx)
const regex_t *preg;
re_dfastate_t *state, **state_log;
re_string_t *input;
int fl_search;
re_match_context_t *mctx;
transit_state (err, preg, state, input, fl_search, state_log, mctx)
reg_errcode_t *err;
const regex_t *preg;
re_dfastate_t *state, **state_log;
re_string_t *input;
int fl_search;
re_match_context_t *mctx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfastate_t **trtable, *next_state;
unsigned char ch;
*err = REG_NOERROR;
if (state == NULL)
{
next_state = state;
@ -1200,7 +1280,11 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
{
/* If the current state can accept multibyte. */
if (state->accept_mb)
transit_state_mb (preg, state, input, state_log, mctx);
{
*err = transit_state_mb (preg, state, input, state_log, mctx);
if (*err != REG_NOERROR)
return NULL;
}
/* Then decide the next state with the single byte. */
if (1)
@ -1221,7 +1305,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
else
{
/* don't use transition table */
next_state = transit_state_sb (preg, state, input, fl_search, mctx);
next_state = transit_state_sb (err, preg, state, input, fl_search,
mctx);
if (next_state == NULL && err != REG_NOERROR)
return NULL;
}
}
@ -1252,7 +1339,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
if (next_state != NULL)
{
table_nodes = next_state->entrance_nodes;
re_node_set_init_union (&next_nodes, table_nodes, log_nodes);
*err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
if (*err != REG_NOERROR)
return NULL;
}
else
next_nodes = *log_nodes;
@ -1262,14 +1352,19 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
context = re_string_context_at (input, re_string_cur_idx (input) - 1,
mctx->eflags, preg->newline_anchor);
next_state = state_log[cur_idx]
= re_acquire_state_context (dfa, &next_nodes, context);
= re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
this function is next_state and ERR is already set. */
if (table_nodes != NULL)
re_node_set_free (&next_nodes);
}
/* If the next state has back references. */
if (next_state != NULL && next_state->has_backref)
{
transit_state_bkref (preg, next_state, input, state_log, mctx);
*err = transit_state_bkref (preg, next_state, input, state_log, mctx);
if (*err != REG_NOERROR)
return NULL;
next_state = state_log[cur_idx];
}
}
@ -1282,12 +1377,13 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
accepting the current input byte. */
static re_dfastate_t *
transit_state_sb (preg, state, input, fl_search, mctx)
const regex_t *preg;
re_dfastate_t *state;
re_string_t *input;
int fl_search;
re_match_context_t *mctx;
transit_state_sb (err, preg, state, input, fl_search, mctx)
reg_errcode_t *err;
const regex_t *preg;
re_dfastate_t *state;
re_string_t *input;
int fl_search;
re_match_context_t *mctx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_node_set next_nodes;
@ -1295,14 +1391,20 @@ transit_state_sb (preg, state, input, fl_search, mctx)
int node_cnt, cur_str_idx = re_string_cur_idx (input);
unsigned int context;
re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
if (*err != REG_NOERROR)
return NULL;
for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
{
int cur_node = state->nodes.elems[node_cnt];
if (check_node_accept (preg, dfa->nodes + cur_node, input,
cur_str_idx, mctx->eflags))
re_node_set_merge (&next_nodes,
dfa->eclosures + dfa->nexts[cur_node]);
{
*err = re_node_set_merge (&next_nodes,
dfa->eclosures + dfa->nexts[cur_node]);
if (*err != REG_NOERROR)
return NULL;
}
}
if (fl_search)
{
@ -1317,23 +1419,32 @@ transit_state_sb (preg, state, input, fl_search, mctx)
}
if (!not_initial)
#endif
re_node_set_merge (&next_nodes, dfa->init_state->entrance_nodes);
{
*err = re_node_set_merge (&next_nodes,
dfa->init_state->entrance_nodes);
if (*err != REG_NOERROR)
return NULL;
}
}
context = re_string_context_at (input, cur_str_idx, mctx->eflags,
preg->newline_anchor);
next_state = re_acquire_state_context (dfa, &next_nodes, context);
next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
this function is next_state and ERR is already set. */
re_node_set_free (&next_nodes);
re_string_skip_bytes (input, 1);
return next_state;
}
static void
static reg_errcode_t
transit_state_mb (preg, pstate, input, state_log, mctx)
const regex_t *preg;
re_dfastate_t *pstate, **state_log;
const re_string_t *input;
re_match_context_t *mctx;
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i;
@ -1376,39 +1487,50 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
if (dest_state == NULL)
dest_nodes = *new_nodes;
else
re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
new_nodes);
{
err = re_node_set_init_union (&dest_nodes,
dest_state->entrance_nodes, new_nodes);
if (err != REG_NOERROR)
return err;
}
context = re_string_context_at (input, dest_idx - 1, mctx->eflags,
preg->newline_anchor);
state_log[dest_idx] = re_acquire_state_context (dfa, &dest_nodes, context);
state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes,
context);
if (state_log[dest_idx] == NULL && err != REG_NOERROR)
return err;
if (dest_state != NULL)
re_node_set_free (&dest_nodes);
}
return REG_NOERROR;
}
static void
static reg_errcode_t
transit_state_bkref (preg, pstate, input, state_log, mctx)
const regex_t *preg;
re_dfastate_t *pstate, **state_log;
const re_string_t *input;
re_match_context_t *mctx;
{
reg_errcode_t err;
re_dfastate_t **work_state_log;
#ifdef DEBUG
assert (mctx->match_first != -1);
#endif
work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1);
if (work_state_log == NULL)
return REG_ESPACE;
transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
state_log, mctx);
err = transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
state_log, mctx);
re_free (work_state_log);
return err;
}
/* Caller must allocate `work_state_log'. */
static void
static reg_errcode_t
transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
const regex_t *preg;
const re_string_t *input;
@ -1416,10 +1538,13 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
re_dfastate_t **work_state_log, **state_log;
re_match_context_t *mctx;
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j;
regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
int cur_str_idx = re_string_cur_idx (input);
if (cur_regs == NULL)
return REG_ESPACE;
for (i = 0; i < nodes->nelem; ++i)
{
@ -1474,7 +1599,9 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
/* Successfully matched, add a new cache entry. */
dest_str_idx = cur_str_idx + subexp_len;
match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
if (err != REG_NOERROR)
return err;
clean_state_log_if_need (state_log, mctx, dest_str_idx);
/* And add the epsilon closures (which is `new_dest_nodes') of
@ -1494,29 +1621,44 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
: state_log[cur_str_idx]->nodes.nelem);
/* Add `new_dest_node' to state_log. */
if (dest_state == NULL)
state_log[dest_str_idx] = re_acquire_state_context (dfa,
new_dest_nodes,
context);
{
state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
new_dest_nodes,
context);
if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
return err;
}
else
{
re_node_set dest_nodes;
re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
new_dest_nodes);
state_log[dest_str_idx] = re_acquire_state_context (dfa, &dest_nodes,
err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
new_dest_nodes);
if (err != REG_NOERROR)
return err;
state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
&dest_nodes,
context);
if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
return err;
re_node_set_free (&dest_nodes);
}
/* We need to check recursively if the backreference can epsilon
transit. */
if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem)
transit_state_bkref_loop (preg, input, new_dest_nodes, work_state_log,
state_log, mctx);
{
err = transit_state_bkref_loop (preg, input, new_dest_nodes,
work_state_log, state_log, mctx);
if (err != REG_NOERROR)
return err;
}
}
re_free (cur_regs);
return REG_NOERROR;
}
/* Build transition table for the state. */
/* Build transition table for the state.
Return the new table if succeeded, otherwise return NULL. */
static re_dfastate_t **
build_trtable (preg, state, fl_search)
@ -1524,6 +1666,7 @@ build_trtable (preg, state, fl_search)
const re_dfastate_t *state;
int fl_search;
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k, ch;
int ndests; /* Number of the destination states from `state'. */
@ -1541,15 +1684,18 @@ build_trtable (preg, state, fl_search)
/* Initialize transiton table. */
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
if (dests_node == NULL || dests_ch == NULL || trtable == NULL)
return NULL;
/* At first, group all nodes belonging to `state' into several
destinations. */
ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
if (ndests == 0)
if (ndests <= 0)
{
re_free (dests_node);
re_free (dests_ch);
return trtable;
/* Return NULL in case of an error, trtable otherwise. */
return (ndests < 0) ? NULL : trtable;
}
dest_states = re_malloc (re_dfastate_t *, ndests);
@ -1557,7 +1703,11 @@ build_trtable (preg, state, fl_search)
dest_states_nl = re_malloc (re_dfastate_t *, ndests);
bitset_empty (acceptable);
re_node_set_alloc (&follows, ndests + 1);
err = re_node_set_alloc (&follows, ndests + 1);
if (dest_states == NULL || dest_states_word == NULL || dest_states_nl == NULL
|| err != REG_NOERROR)
return NULL;
/* Then build the states for all destinations. */
for (i = 0; i < ndests; ++i)
{
@ -1569,7 +1719,9 @@ build_trtable (preg, state, fl_search)
next_node = dfa->nexts[dests_node[i].elems[j]];
if (next_node != -1)
{
re_node_set_merge (&follows, dfa->eclosures + next_node);
err = re_node_set_merge (&follows, dfa->eclosures + next_node);
if (err != REG_NOERROR)
return NULL;
}
}
/* If search flag is set, merge the initial state. */
@ -1585,17 +1737,28 @@ build_trtable (preg, state, fl_search)
}
if (!not_initial)
#endif
re_node_set_merge (&follows, dfa->init_state->entrance_nodes);
{
err = re_node_set_merge (&follows,
dfa->init_state->entrance_nodes);
if (err != REG_NOERROR)
return NULL;
}
}
dest_states[i] = re_acquire_state_context (dfa, &follows, 0);
dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
if (dest_states[i] == NULL && err != REG_NOERROR)
return NULL;
/* If the new state has context constraint,
build appropriate states for these contexts. */
if (dest_states[i]->has_constraint)
{
dest_states_word[i] = re_acquire_state_context (dfa, &follows,
dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_WORD);
dest_states_nl[i] = re_acquire_state_context (dfa, &follows,
if (dest_states_word[i] == NULL && err != REG_NOERROR)
return NULL;
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE);
if (dest_states_nl[i] == NULL && err != REG_NOERROR)
return NULL;
}
else
{
@ -1654,6 +1817,7 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
re_node_set *dests_node;
bitset *dests_ch;
{
reg_errcode_t err;
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k;
int ndests; /* Number of the destinations from `state'. */
@ -1750,12 +1914,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
{
bitset_copy (dests_ch[ndests], remains);
bitset_copy (dests_ch[j], intersec);
re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
if (err != REG_NOERROR)
return -1;
++ndests;
}
/* Put the position in the current group. */
re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
if (err < 0)
return -1;
/* If all characters are consumed, go to next node. */
if (!not_consumed)
@ -1765,7 +1933,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
if (j == ndests)
{
bitset_copy (dests_ch[ndests], accepts);
re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
if (err != REG_NOERROR)
return -1;
++ndests;
bitset_empty (accepts);
}
@ -2028,7 +2198,7 @@ check_node_accept (preg, node, input, idx, eflags)
/* Functions for matching context. */
static void
static reg_errcode_t
match_ctx_init (mctx, eflags, n)
re_match_context_t *mctx;
int eflags;
@ -2037,12 +2207,17 @@ match_ctx_init (mctx, eflags, n)
mctx->eflags = eflags;
mctx->match_first = mctx->match_last = -1;
if (n > 0)
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
{
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
if (mctx->bkref_ents == NULL)
return REG_ESPACE;
}
else
mctx->bkref_ents = NULL;
mctx->nbkref_ents = 0;
mctx->abkref_ents = n;
mctx->max_bkref_len = 0;
return REG_NOERROR;
}
static void
@ -2054,7 +2229,7 @@ match_ctx_free (mctx)
/* Add a new backreference entry to the cache. */
static void
static reg_errcode_t
match_ctx_add_entry (mctx, node, from, to)
re_match_context_t *mctx;
int node, from, to;
@ -2064,6 +2239,8 @@ match_ctx_add_entry (mctx, node, from, to)
mctx->bkref_ents = re_realloc (mctx->bkref_ents,
struct re_backref_cache_entry,
mctx->abkref_ents * 2);
if (mctx->bkref_ents == NULL)
return REG_ESPACE;
memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
mctx->abkref_ents *= 2;
@ -2073,4 +2250,5 @@ match_ctx_add_entry (mctx, node, from, to)
mctx->bkref_ents[mctx->nbkref_ents++].to = to;
if (mctx->max_bkref_len < to - from)
mctx->max_bkref_len = to - from;
return REG_NOERROR;
}