mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-30 12:31:53 +08:00
Update.
2002-10-11 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_fastmap_iter): Remove the handling OP_CONTEXT_NODE. (regfree): Likewise. (create_initial_state): Likewise. (analyze): Remove the substitutions which became useless. (calc_first): Likewise. (calc_epsdest): Use edests of OP_BACK_REF in case that it has epsilon destination. (duplicate_node_closure): New function. (duplicate_node): Remove the handling OP_CONTEXT_NODE. (calc_inveclosure): Likewise. (calc_eclosure): Likewise. (calc_eclosure_iter): Invoke duplicate_node_closure instead of direct invocation of duplicate_node. (parse): Don't use comma operator in the return to avoid compiler warning. (parse_reg_exp): Likewise. (parse_branch): Likewise. (parse_expression): Likewise. (parse_sub_exp): Likewise. (parse_dup_op): Likewise. * posix/regex_internal.c (re_dfa_add_node): Remove the substitutions which became useless. (create_ci_newstate): Remove the handling OP_CONTEXT_NODE. (create_cd_newstate): Likewise. * posix/regex_internal.h (re_token_type_t): Remove the obsolete type. (re_token_t): Likewise. (re_dfa_t): Likewise. (re_node_set_remove): New macro. * posix/regexec.c (check_matching): Remove the handling OP_CONTEXT_NODE. (check_halt_node_context): Likewise. (proceed_next_node): Likewise. (pop_fail_stack): Fix the memory leak. (set_regs): Likewise. (free_fail_stack_return): New function. (sift_states_backward): Fix the memory leak. Remove the handling OP_CONTEXT_NODE. (update_cur_sifted_state): Append some if clause to avoid redundant call. (sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a back reference. (check_dst_limits): Remove the handling OP_CONTEXT_NODE. (check_subexp_limits): Likewise. (search_subexp): Likewise. (sift_states_bkref): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (transit_state_bkref_loop): Likewise. (group_nodes_into_DFAstates): Likewise. (check_node_accept): Likewise. (sift_ctx_init): Add initializing. 2002-10-12 Ulrich Drepper <drepper@redhat.com> * sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use __builtin_expect.
This commit is contained in:
parent
cc12f2a442
commit
485d775dd5
60
ChangeLog
60
ChangeLog
@ -1,3 +1,63 @@
|
||||
2002-10-11 Isamu Hasegawa <isamu@yamato.ibm.com>
|
||||
|
||||
* posix/regcomp.c (re_compile_fastmap_iter): Remove the handling
|
||||
OP_CONTEXT_NODE.
|
||||
(regfree): Likewise.
|
||||
(create_initial_state): Likewise.
|
||||
(analyze): Remove the substitutions which became useless.
|
||||
(calc_first): Likewise.
|
||||
(calc_epsdest): Use edests of OP_BACK_REF in case that it has
|
||||
epsilon destination.
|
||||
(duplicate_node_closure): New function.
|
||||
(duplicate_node): Remove the handling OP_CONTEXT_NODE.
|
||||
(calc_inveclosure): Likewise.
|
||||
(calc_eclosure): Likewise.
|
||||
(calc_eclosure_iter): Invoke duplicate_node_closure instead of
|
||||
direct invocation of duplicate_node.
|
||||
(parse): Don't use comma operator in the return to avoid compiler
|
||||
warning.
|
||||
(parse_reg_exp): Likewise.
|
||||
(parse_branch): Likewise.
|
||||
(parse_expression): Likewise.
|
||||
(parse_sub_exp): Likewise.
|
||||
(parse_dup_op): Likewise.
|
||||
* posix/regex_internal.c (re_dfa_add_node): Remove the substitutions
|
||||
which became useless.
|
||||
(create_ci_newstate): Remove the handling OP_CONTEXT_NODE.
|
||||
(create_cd_newstate): Likewise.
|
||||
* posix/regex_internal.h (re_token_type_t): Remove the obsolete type.
|
||||
(re_token_t): Likewise.
|
||||
(re_dfa_t): Likewise.
|
||||
(re_node_set_remove): New macro.
|
||||
* posix/regexec.c (check_matching): Remove the handling
|
||||
OP_CONTEXT_NODE.
|
||||
(check_halt_node_context): Likewise.
|
||||
(proceed_next_node): Likewise.
|
||||
(pop_fail_stack): Fix the memory leak.
|
||||
(set_regs): Likewise.
|
||||
(free_fail_stack_return): New function.
|
||||
(sift_states_backward): Fix the memory leak. Remove the handling
|
||||
OP_CONTEXT_NODE.
|
||||
(update_cur_sifted_state): Append some if clause to avoid redundant
|
||||
call.
|
||||
(sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a
|
||||
back reference.
|
||||
(check_dst_limits): Remove the handling OP_CONTEXT_NODE.
|
||||
(check_subexp_limits): Likewise.
|
||||
(search_subexp): Likewise.
|
||||
(sift_states_bkref): Likewise.
|
||||
(transit_state_mb): Likewise.
|
||||
(transit_state_bkref_loop): Likewise.
|
||||
(transit_state_bkref_loop): Likewise.
|
||||
(group_nodes_into_DFAstates): Likewise.
|
||||
(check_node_accept): Likewise.
|
||||
(sift_ctx_init): Add initializing.
|
||||
|
||||
2002-10-12 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use
|
||||
__builtin_expect.
|
||||
|
||||
2002-10-11 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* elf/dl-load.c (_dl_map_object_from_fd): Remove unnecessarily
|
||||
|
365
posix/regcomp.c
365
posix/regcomp.c
@ -85,6 +85,9 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
|
||||
int top_clone_node, int root_node,
|
||||
unsigned int constraint);
|
||||
static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
|
||||
unsigned int constraint);
|
||||
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
|
||||
@ -351,11 +354,6 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
|
||||
{
|
||||
int node = init_state->nodes.elems[node_cnt];
|
||||
re_token_type_t type = dfa->nodes[node].type;
|
||||
if (type == OP_CONTEXT_NODE)
|
||||
{
|
||||
node = dfa->nodes[node].opr.ctx_info->entity;
|
||||
type = dfa->nodes[node].type;
|
||||
}
|
||||
|
||||
if (type == CHARACTER)
|
||||
fastmap[dfa->nodes[node].opr.c] = 1;
|
||||
@ -587,18 +585,7 @@ regfree (preg)
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
|
||||
re_free (node->opr.sbcset);
|
||||
else if (node->type == OP_CONTEXT_NODE)
|
||||
{
|
||||
if (dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
|
||||
{
|
||||
if (node->opr.ctx_info->bkref_eclosure != NULL)
|
||||
re_node_set_free (node->opr.ctx_info->bkref_eclosure);
|
||||
re_free (node->opr.ctx_info->bkref_eclosure);
|
||||
}
|
||||
re_free (node->opr.ctx_info);
|
||||
}
|
||||
}
|
||||
re_free (dfa->firsts);
|
||||
re_free (dfa->nexts);
|
||||
for (i = 0; i < dfa->nodes_len; ++i)
|
||||
{
|
||||
@ -883,39 +870,25 @@ create_initial_state (dfa)
|
||||
re_token_type_t type = dfa->nodes[node_idx].type;
|
||||
|
||||
int clexp_idx;
|
||||
int entity = (type != OP_CONTEXT_NODE ? node_idx
|
||||
: dfa->nodes[node_idx].opr.ctx_info->entity);
|
||||
if ((type != OP_CONTEXT_NODE
|
||||
|| (dfa->nodes[entity].type != OP_BACK_REF))
|
||||
&& (type != OP_BACK_REF))
|
||||
if (type != OP_BACK_REF)
|
||||
continue;
|
||||
for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
|
||||
{
|
||||
re_token_t *clexp_node;
|
||||
clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
|
||||
if (clexp_node->type == OP_CLOSE_SUBEXP
|
||||
&& clexp_node->opr.idx + 1 == dfa->nodes[entity].opr.idx)
|
||||
&& clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
|
||||
break;
|
||||
}
|
||||
if (clexp_idx == init_nodes.nelem)
|
||||
continue;
|
||||
|
||||
if (type == OP_CONTEXT_NODE
|
||||
&& (dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type
|
||||
== OP_BACK_REF))
|
||||
if (type == OP_BACK_REF)
|
||||
{
|
||||
int prev_nelem = init_nodes.nelem;
|
||||
re_node_set_merge (&init_nodes,
|
||||
dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure);
|
||||
if (prev_nelem < init_nodes.nelem)
|
||||
i = 0;
|
||||
}
|
||||
else if (type == OP_BACK_REF)
|
||||
{
|
||||
int next_idx = dfa->nexts[node_idx];
|
||||
if (!re_node_set_contains (&init_nodes, next_idx))
|
||||
int dest_idx = dfa->edests[node_idx].elems[0];
|
||||
if (!re_node_set_contains (&init_nodes, dest_idx))
|
||||
{
|
||||
re_node_set_merge (&init_nodes, dfa->eclosures + next_idx);
|
||||
re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
@ -959,18 +932,16 @@ analyze (dfa)
|
||||
reg_errcode_t ret;
|
||||
|
||||
/* Allocate arrays. */
|
||||
dfa->firsts = re_malloc (int, dfa->nodes_alloc);
|
||||
dfa->nexts = re_malloc (int, dfa->nodes_alloc);
|
||||
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
|
||||
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
|
||||
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
|
||||
if (BE (dfa->firsts == NULL || dfa->nexts == NULL || dfa->edests == NULL
|
||||
if (BE (dfa->nexts == NULL || dfa->edests == NULL
|
||||
|| dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
/* Initialize them. */
|
||||
for (i = 0; i < dfa->nodes_len; ++i)
|
||||
{
|
||||
dfa->firsts[i] = -1;
|
||||
dfa->nexts[i] = -1;
|
||||
re_node_set_init_empty (dfa->edests + i);
|
||||
re_node_set_init_empty (dfa->eclosures + i);
|
||||
@ -1083,8 +1054,6 @@ calc_first (dfa, node)
|
||||
node->first = node->left->first;
|
||||
break;
|
||||
}
|
||||
if (node->type == 0)
|
||||
dfa->firsts[idx] = node->first;
|
||||
}
|
||||
|
||||
/* Calculate "next" for the node NODE. */
|
||||
@ -1187,11 +1156,114 @@ calc_epsdest (dfa, node)
|
||||
}
|
||||
else if (dfa->nodes[idx].type == ANCHOR
|
||||
|| dfa->nodes[idx].type == OP_OPEN_SUBEXP
|
||||
|| dfa->nodes[idx].type == OP_CLOSE_SUBEXP)
|
||||
|| dfa->nodes[idx].type == OP_CLOSE_SUBEXP
|
||||
|| dfa->nodes[idx].type == OP_BACK_REF)
|
||||
re_node_set_init_1 (dfa->edests + idx, node->next);
|
||||
}
|
||||
}
|
||||
|
||||
/* Duplicate the epsilon closure of the node ROOT_NODE.
|
||||
Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
|
||||
to their own constraint. */
|
||||
|
||||
static reg_errcode_t
|
||||
duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
|
||||
init_constraint)
|
||||
re_dfa_t *dfa;
|
||||
int top_org_node, top_clone_node, root_node;
|
||||
unsigned int init_constraint;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
int org_node, clone_node, ret;
|
||||
unsigned int constraint = init_constraint;
|
||||
for (org_node = top_org_node, clone_node = top_clone_node;;)
|
||||
{
|
||||
int org_dest, clone_dest;
|
||||
if (dfa->nodes[org_node].type == OP_BACK_REF)
|
||||
{
|
||||
/* If the back reference epsilon-transit, its destination must
|
||||
also have the constraint. Then duplicate the epsilon closure
|
||||
of the destination of the back reference, and store it in
|
||||
edests of the back reference. */
|
||||
org_dest = dfa->nexts[org_node];
|
||||
re_node_set_empty (dfa->edests + clone_node);
|
||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
dfa->nexts[clone_node] = dfa->nexts[org_node];
|
||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||
if (BE (ret < 0, 0))
|
||||
return REG_ESPACE;
|
||||
}
|
||||
else if (dfa->edests[org_node].nelem == 0)
|
||||
{
|
||||
/* In case of the node can't epsilon-transit, don't duplicate the
|
||||
destination and store the original destination as the
|
||||
destination of the node. */
|
||||
dfa->nexts[clone_node] = dfa->nexts[org_node];
|
||||
break;
|
||||
}
|
||||
else if (dfa->edests[org_node].nelem == 1)
|
||||
{
|
||||
/* In case of the node can epsilon-transit, and it has only one
|
||||
destination. */
|
||||
org_dest = dfa->edests[org_node].elems[0];
|
||||
re_node_set_empty (dfa->edests + clone_node);
|
||||
if (dfa->nodes[org_node].type == ANCHOR)
|
||||
{
|
||||
/* In case of the node has another constraint, append it. */
|
||||
if (org_node == root_node && clone_node != org_node)
|
||||
{
|
||||
/* ...but if the node is root_node itself, it means the
|
||||
epsilon closure have a loop, then tie it to the
|
||||
destination of the root_node. */
|
||||
ret = re_node_set_insert (dfa->edests + clone_node,
|
||||
org_dest);
|
||||
if (BE (ret < 0, 0))
|
||||
return REG_ESPACE;
|
||||
break;
|
||||
}
|
||||
constraint |= dfa->nodes[org_node].opr.ctx_type;
|
||||
}
|
||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||
if (BE (ret < 0, 0))
|
||||
return REG_ESPACE;
|
||||
}
|
||||
else /* dfa->edests[org_node].nelem == 2 */
|
||||
{
|
||||
/* In case of the node can epsilon-transit, and it has two
|
||||
destinations. */
|
||||
org_dest = dfa->edests[org_node].elems[0];
|
||||
re_node_set_empty (dfa->edests + clone_node);
|
||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||
if (BE (ret < 0, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node,
|
||||
constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
|
||||
org_dest = dfa->edests[org_node].elems[1];
|
||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||
if (BE (ret < 0, 0))
|
||||
return REG_ESPACE;
|
||||
}
|
||||
org_node = org_dest;
|
||||
clone_node = clone_dest;
|
||||
}
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
|
||||
The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
|
||||
otherwise return the error code. */
|
||||
@ -1204,50 +1276,18 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
|
||||
{
|
||||
re_token_t dup;
|
||||
int dup_idx;
|
||||
reg_errcode_t err;
|
||||
|
||||
dup.type = OP_CONTEXT_NODE;
|
||||
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
|
||||
{
|
||||
/* If the node whose index is ORG_IDX is the same as the intended
|
||||
node, use it. */
|
||||
if (dfa->nodes[org_idx].constraint == constraint)
|
||||
{
|
||||
*new_idx = org_idx;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
dup.constraint = constraint |
|
||||
dfa->nodes[org_idx].constraint;
|
||||
}
|
||||
else
|
||||
dup.constraint = constraint;
|
||||
|
||||
/* In case that `entity' points OP_CONTEXT_NODE,
|
||||
we correct `entity' to real entity in calc_inveclosures(). */
|
||||
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
|
||||
dup = dfa->nodes[org_idx];
|
||||
dup_idx = re_dfa_add_node (dfa, dup, 1);
|
||||
if (BE (dup.opr.ctx_info == NULL || dup_idx == -1, 0))
|
||||
if (BE (dup_idx == -1, 0))
|
||||
return REG_ESPACE;
|
||||
dup.opr.ctx_info->entity = org_idx;
|
||||
dup.opr.ctx_info->bkref_eclosure = NULL;
|
||||
|
||||
dfa->nodes[dup_idx].constraint = constraint;
|
||||
if (dfa->nodes[org_idx].type == ANCHOR)
|
||||
dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
|
||||
dfa->nodes[dup_idx].duplicated = 1;
|
||||
dfa->firsts[dup_idx] = dfa->firsts[org_idx];
|
||||
dfa->nexts[dup_idx] = dfa->nexts[org_idx];
|
||||
err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
/* Since we don't duplicate epsilon nodes, epsilon closure have
|
||||
only itself. */
|
||||
err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
/* Then we must update inveclosure for this node.
|
||||
We process them at last part of calc_eclosure(),
|
||||
since we don't complete to calculate them here. */
|
||||
re_node_set_init_empty (dfa->edests + dup_idx);
|
||||
re_node_set_init_empty (dfa->eclosures + dup_idx);
|
||||
re_node_set_init_empty (dfa->inveclosures + dup_idx);
|
||||
|
||||
*new_idx = dup_idx;
|
||||
return REG_NOERROR;
|
||||
@ -1257,7 +1297,7 @@ static void
|
||||
calc_inveclosure (dfa)
|
||||
re_dfa_t *dfa;
|
||||
{
|
||||
int src, idx, dest, entity;
|
||||
int src, idx, dest;
|
||||
for (src = 0; src < dfa->nodes_len; ++src)
|
||||
{
|
||||
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
|
||||
@ -1265,15 +1305,6 @@ calc_inveclosure (dfa)
|
||||
dest = dfa->eclosures[src].elems[idx];
|
||||
re_node_set_insert (dfa->inveclosures + dest, src);
|
||||
}
|
||||
|
||||
entity = src;
|
||||
while (dfa->nodes[entity].type == OP_CONTEXT_NODE)
|
||||
{
|
||||
entity = dfa->nodes[entity].opr.ctx_info->entity;
|
||||
re_node_set_merge (dfa->inveclosures + src,
|
||||
dfa->inveclosures + entity);
|
||||
dfa->nodes[src].opr.ctx_info->entity = entity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1283,16 +1314,17 @@ static reg_errcode_t
|
||||
calc_eclosure (dfa)
|
||||
re_dfa_t *dfa;
|
||||
{
|
||||
int idx, node_idx, max, incomplete = 0;
|
||||
int node_idx, incomplete;
|
||||
#ifdef DEBUG
|
||||
assert (dfa->nodes_len > 0);
|
||||
#endif
|
||||
incomplete = 0;
|
||||
/* For each nodes, calculate epsilon closure. */
|
||||
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
|
||||
for (node_idx = 0; ; ++node_idx)
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_node_set eclosure_elem;
|
||||
if (node_idx == max)
|
||||
if (node_idx == dfa->nodes_len)
|
||||
{
|
||||
if (!incomplete)
|
||||
break;
|
||||
@ -1301,7 +1333,6 @@ calc_eclosure (dfa)
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
assert (dfa->nodes[node_idx].type != OP_CONTEXT_NODE);
|
||||
assert (dfa->eclosures[node_idx].nelem != -1);
|
||||
#endif
|
||||
/* If we have already calculated, skip it. */
|
||||
@ -1318,41 +1349,6 @@ calc_eclosure (dfa)
|
||||
re_node_set_free (&eclosure_elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* for duplicated nodes. */
|
||||
for (idx = max; idx < dfa->nodes_len; ++idx)
|
||||
{
|
||||
int entity, i, constraint;
|
||||
re_node_set *bkref_eclosure;
|
||||
entity = dfa->nodes[idx].opr.ctx_info->entity;
|
||||
re_node_set_merge (dfa->inveclosures + idx, dfa->inveclosures + entity);
|
||||
if (dfa->nodes[entity].type != OP_BACK_REF)
|
||||
continue;
|
||||
|
||||
/* If the node is backreference, duplicate the epsilon closure of
|
||||
the next node. Since it may epsilon transit. */
|
||||
/* Note: duplicate_node() may realloc dfa->eclosures, etc. */
|
||||
bkref_eclosure = re_malloc (re_node_set, 1);
|
||||
if (BE (bkref_eclosure == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
re_node_set_init_empty (bkref_eclosure);
|
||||
constraint = dfa->nodes[idx].constraint;
|
||||
for (i = 0; i < dfa->eclosures[dfa->nexts[idx]].nelem; ++i)
|
||||
{
|
||||
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
|
||||
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
|
||||
{
|
||||
reg_errcode_t err;
|
||||
err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
|
||||
constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
}
|
||||
re_node_set_insert (bkref_eclosure, dest_node_idx);
|
||||
}
|
||||
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
|
||||
}
|
||||
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
@ -1366,8 +1362,9 @@ calc_eclosure_iter (new_set, dfa, node, root)
|
||||
{
|
||||
reg_errcode_t err;
|
||||
unsigned int constraint;
|
||||
int i, max, incomplete = 0;
|
||||
int i, incomplete;
|
||||
re_node_set eclosure;
|
||||
incomplete = 0;
|
||||
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
@ -1378,9 +1375,19 @@ calc_eclosure_iter (new_set, dfa, node, root)
|
||||
|
||||
constraint = ((dfa->nodes[node].type == ANCHOR)
|
||||
? dfa->nodes[node].opr.ctx_type : 0);
|
||||
/* If the current node has constraints, duplicate all nodes.
|
||||
Since they must inherit the constraints. */
|
||||
if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
|
||||
{
|
||||
int org_node, cur_node;
|
||||
org_node = cur_node = node;
|
||||
err = duplicate_node_closure (dfa, node, node, node, constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Expand each epsilon destination nodes. */
|
||||
if (dfa->edests[node].nelem != 0)
|
||||
if (IS_EPSILON_NODE(dfa->nodes[node].type))
|
||||
for (i = 0; i < dfa->edests[node].nelem; ++i)
|
||||
{
|
||||
re_node_set eclosure_elem;
|
||||
@ -1413,28 +1420,6 @@ calc_eclosure_iter (new_set, dfa, node, root)
|
||||
}
|
||||
}
|
||||
|
||||
/* If the current node has constraints, duplicate all non-epsilon nodes.
|
||||
Since they must inherit the constraints. */
|
||||
if (constraint)
|
||||
for (i = 0, max = eclosure.nelem; i < max; ++i)
|
||||
{
|
||||
int dest = eclosure.elems[i];
|
||||
if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
|
||||
{
|
||||
int dup_dest;
|
||||
reg_errcode_t err;
|
||||
err = duplicate_node (&dup_dest, dfa, dest, constraint);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
if (dest != dup_dest)
|
||||
{
|
||||
re_node_set_remove_at (&eclosure, i--);
|
||||
re_node_set_insert (&eclosure, dup_dest);
|
||||
--max;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Epsilon closures include itself. */
|
||||
re_node_set_insert (&eclosure, node);
|
||||
if (incomplete && !root)
|
||||
@ -1793,7 +1778,10 @@ parse (regexp, preg, syntax, err)
|
||||
else
|
||||
root = eor;
|
||||
if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
@ -1841,7 +1829,10 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
|
||||
branch = NULL;
|
||||
tree = create_tree (tree, branch, 0, new_idx);
|
||||
if (BE (new_idx == -1 || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
dfa->has_plural_match = 1;
|
||||
}
|
||||
return tree;
|
||||
@ -1883,7 +1874,10 @@ parse_branch (regexp, preg, token, syntax, nest, err)
|
||||
{
|
||||
tree = create_tree (tree, exp, CONCAT, 0);
|
||||
if (tree == NULL)
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else if (tree == NULL)
|
||||
tree = exp;
|
||||
@ -1916,7 +1910,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
new_idx = re_dfa_add_node (dfa, *token, 0);
|
||||
tree = create_tree (NULL, NULL, 0, new_idx);
|
||||
if (BE (new_idx == -1 || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
@ -1954,7 +1951,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
new_idx = re_dfa_add_node (dfa, *token, 0);
|
||||
tree = create_tree (NULL, NULL, 0, new_idx);
|
||||
if (BE (new_idx == -1 || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
++dfa->nbackref;
|
||||
dfa->has_mb_node = 1;
|
||||
break;
|
||||
@ -1963,7 +1963,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
case OP_DUP_QUESTION:
|
||||
case OP_OPEN_DUP_NUM:
|
||||
if (syntax & RE_CONTEXT_INVALID_OPS)
|
||||
return *err = REG_BADRPT, NULL;
|
||||
{
|
||||
*err = REG_BADRPT;
|
||||
return NULL;
|
||||
}
|
||||
else if (syntax & RE_CONTEXT_INDEP_OPS)
|
||||
{
|
||||
*token = fetch_token (regexp, syntax);
|
||||
@ -1973,7 +1976,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
case OP_CLOSE_SUBEXP:
|
||||
if ((token->type == OP_CLOSE_SUBEXP) &&
|
||||
!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
|
||||
return *err = REG_ERPAREN, NULL;
|
||||
{
|
||||
*err = REG_ERPAREN;
|
||||
return NULL;
|
||||
}
|
||||
/* else fall through */
|
||||
case OP_CLOSE_DUP_NUM:
|
||||
/* We treat it as a normal character. */
|
||||
@ -1983,7 +1989,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
new_idx = re_dfa_add_node (dfa, *token, 0);
|
||||
tree = create_tree (NULL, NULL, 0, new_idx);
|
||||
if (BE (new_idx == -1 || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
case ANCHOR:
|
||||
if (dfa->word_char == NULL)
|
||||
@ -2008,7 +2017,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
|
||||
|| tree_first == NULL || tree_last == NULL
|
||||
|| tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2027,7 +2039,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
new_idx = re_dfa_add_node (dfa, *token, 0);
|
||||
tree = create_tree (NULL, NULL, 0, new_idx);
|
||||
if (BE (new_idx == -1 || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
if (MB_CUR_MAX > 1)
|
||||
dfa->has_mb_node = 1;
|
||||
break;
|
||||
@ -2108,7 +2123,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
|
||||
new_idx = re_dfa_add_node (dfa, *token, 0);
|
||||
left_par = create_tree (NULL, NULL, 0, new_idx);
|
||||
if (BE (new_idx == -1 || left_par == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
dfa->nodes[new_idx].opr.idx = cur_nsub;
|
||||
*token = fetch_token (regexp, syntax);
|
||||
|
||||
@ -2134,7 +2152,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
|
||||
: create_tree (tree, right_par, CONCAT, 0));
|
||||
tree = create_tree (left_par, tree, CONCAT, 0);
|
||||
if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
dfa->nodes[new_idx].opr.idx = cur_nsub;
|
||||
|
||||
return tree;
|
||||
@ -2252,7 +2273,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
||||
work_tree = duplicate_tree (elem, dfa);
|
||||
tree = create_tree (tree, work_tree, CONCAT, 0);
|
||||
if (BE (work_tree == NULL || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2261,7 +2285,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
||||
new_idx = re_dfa_add_node (dfa, *token, 0);
|
||||
tree = create_tree (tree, NULL, 0, new_idx);
|
||||
if (BE (new_idx == -1 || tree == NULL, 0))
|
||||
return *err = REG_ESPACE, NULL;
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
*token = fetch_token (regexp, syntax);
|
||||
return tree;
|
||||
|
@ -929,20 +929,18 @@ re_dfa_add_node (dfa, token, mode)
|
||||
dfa->nodes = new_array;
|
||||
if (mode)
|
||||
{
|
||||
int *new_firsts, *new_nexts;
|
||||
int *new_nexts;
|
||||
re_node_set *new_edests, *new_eclosures, *new_inveclosures;
|
||||
|
||||
new_firsts = re_realloc (dfa->firsts, int, dfa->nodes_alloc);
|
||||
new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
|
||||
new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
|
||||
new_eclosures = re_realloc (dfa->eclosures, re_node_set,
|
||||
dfa->nodes_alloc);
|
||||
new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
|
||||
dfa->nodes_alloc);
|
||||
if (BE (new_firsts == NULL || new_nexts == NULL || new_edests == NULL
|
||||
if (BE (new_nexts == NULL || new_edests == NULL
|
||||
|| new_eclosures == NULL || new_inveclosures == NULL, 0))
|
||||
return -1;
|
||||
dfa->firsts = new_firsts;
|
||||
dfa->nexts = new_nexts;
|
||||
dfa->edests = new_edests;
|
||||
dfa->eclosures = new_eclosures;
|
||||
@ -951,6 +949,7 @@ re_dfa_add_node (dfa, token, mode)
|
||||
}
|
||||
dfa->nodes[dfa->nodes_len] = token;
|
||||
dfa->nodes[dfa->nodes_len].duplicated = 0;
|
||||
dfa->nodes[dfa->nodes_len].constraint = 0;
|
||||
return dfa->nodes_len++;
|
||||
}
|
||||
|
||||
@ -1126,7 +1125,7 @@ create_ci_newstate (dfa, nodes, hash)
|
||||
{
|
||||
re_token_t *node = dfa->nodes + nodes->elems[i];
|
||||
re_token_type_t type = node->type;
|
||||
if (type == CHARACTER)
|
||||
if (type == CHARACTER && !node->constraint)
|
||||
continue;
|
||||
|
||||
/* If the state has the halt node, the state is a halt state. */
|
||||
@ -1139,13 +1138,8 @@ create_ci_newstate (dfa, nodes, hash)
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
else if (type == OP_BACK_REF)
|
||||
newstate->has_backref = 1;
|
||||
else if (type == ANCHOR || OP_CONTEXT_NODE)
|
||||
{
|
||||
newstate->has_constraint = 1;
|
||||
if (type == OP_CONTEXT_NODE
|
||||
&& dfa->nodes[node->opr.ctx_info->entity].type == END_OF_RE)
|
||||
newstate->halt = 1;
|
||||
}
|
||||
else if (type == ANCHOR || node->constraint)
|
||||
newstate->has_constraint = 1;
|
||||
}
|
||||
err = register_state (dfa, newstate, hash);
|
||||
return (err != REG_NOERROR) ? NULL : newstate;
|
||||
@ -1175,9 +1169,11 @@ create_cd_newstate (dfa, nodes, context, hash)
|
||||
unsigned int constraint = 0;
|
||||
re_token_t *node = dfa->nodes + nodes->elems[i];
|
||||
re_token_type_t type = node->type;
|
||||
if (type == CHARACTER)
|
||||
continue;
|
||||
if (node->constraint)
|
||||
constraint = node->constraint;
|
||||
|
||||
if (type == CHARACTER && !constraint)
|
||||
continue;
|
||||
/* If the state has the halt node, the state is a halt state. */
|
||||
else if (type == END_OF_RE)
|
||||
newstate->halt = 1;
|
||||
@ -1190,20 +1186,6 @@ create_cd_newstate (dfa, nodes, context, hash)
|
||||
newstate->has_backref = 1;
|
||||
else if (type == ANCHOR)
|
||||
constraint = node->opr.ctx_type;
|
||||
else if (type == OP_CONTEXT_NODE)
|
||||
{
|
||||
re_token_type_t ctype = dfa->nodes[node->opr.ctx_info->entity].type;
|
||||
constraint = node->constraint;
|
||||
if (ctype == END_OF_RE)
|
||||
newstate->halt = 1;
|
||||
else if (ctype == OP_BACK_REF)
|
||||
newstate->has_backref = 1;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
else if (ctype == COMPLEX_BRACKET
|
||||
|| (type == OP_PERIOD && MB_CUR_MAX > 1))
|
||||
newstate->accept_mb = 1;
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
}
|
||||
|
||||
if (constraint)
|
||||
{
|
||||
|
@ -133,7 +133,6 @@ typedef enum
|
||||
OP_DUP_QUESTION,
|
||||
OP_BACK_REF,
|
||||
ANCHOR,
|
||||
OP_CONTEXT_NODE,
|
||||
|
||||
/* Dummy marker. */
|
||||
END_OF_RE_TOKEN_T
|
||||
@ -198,11 +197,6 @@ typedef struct
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
int idx; /* for BACK_REF */
|
||||
re_context_type ctx_type; /* for ANCHOR */
|
||||
struct
|
||||
{
|
||||
int entity; /* for OP_CONTEXT_NODE, index of the entity */
|
||||
re_node_set *bkref_eclosure;
|
||||
} *ctx_info;
|
||||
} opr;
|
||||
#if __GNUC__ >= 2
|
||||
re_token_type_t type : 8;
|
||||
@ -474,7 +468,6 @@ struct re_dfa_t
|
||||
int nodes_alloc;
|
||||
int nodes_len;
|
||||
bin_tree_t *str_tree;
|
||||
int *firsts;
|
||||
int *nexts;
|
||||
re_node_set *edests;
|
||||
re_node_set *eclosures;
|
||||
@ -519,6 +512,8 @@ static int re_node_set_compare (const re_node_set *set1,
|
||||
const re_node_set *set2);
|
||||
static int re_node_set_contains (const re_node_set *set, int elem);
|
||||
static void re_node_set_remove_at (re_node_set *set, int idx);
|
||||
#define re_node_set_remove(set,id) \
|
||||
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
|
||||
#define re_node_set_empty(p) ((p)->nelem = 0)
|
||||
#define re_node_set_free(set) re_free ((set)->elems)
|
||||
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
|
||||
|
236
posix/regexec.c
236
posix/regexec.c
@ -95,6 +95,8 @@ static reg_errcode_t set_regs (const regex_t *preg,
|
||||
const re_match_context_t *mctx,
|
||||
size_t nmatch, regmatch_t *pmatch,
|
||||
int fl_backtrack);
|
||||
static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static int sift_states_iter_mb (const regex_t *preg,
|
||||
const re_match_context_t *mctx,
|
||||
@ -845,11 +847,8 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
for (i = 0; i < cur_state->nodes.nelem; ++i)
|
||||
{
|
||||
re_token_type_t type;
|
||||
int node = cur_state->nodes.elems[i];
|
||||
int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node
|
||||
: dfa->nodes[node].opr.ctx_info->entity);
|
||||
type = dfa->nodes[entity].type;
|
||||
re_token_type_t type = dfa->nodes[node].type;
|
||||
if (type == OP_BACK_REF)
|
||||
{
|
||||
int clexp_idx;
|
||||
@ -859,7 +858,7 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
|
||||
re_token_t *clexp_node;
|
||||
clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
|
||||
if (clexp_node->type == OP_CLOSE_SUBEXP
|
||||
&& clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx)
|
||||
&& clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx)
|
||||
{
|
||||
err = match_ctx_add_entry (mctx, node, 0, 0, 0);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
@ -955,15 +954,13 @@ static int check_halt_node_context (dfa, node, context)
|
||||
int node;
|
||||
unsigned int context;
|
||||
{
|
||||
int entity;
|
||||
re_token_type_t type = dfa->nodes[node].type;
|
||||
if (type == END_OF_RE)
|
||||
return 1;
|
||||
if (type != OP_CONTEXT_NODE)
|
||||
unsigned int constraint = dfa->nodes[node].constraint;
|
||||
if (type != END_OF_RE)
|
||||
return 0;
|
||||
entity = dfa->nodes[node].opr.ctx_info->entity;
|
||||
if (dfa->nodes[entity].type != END_OF_RE
|
||||
|| NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[node].constraint, context))
|
||||
if (!constraint)
|
||||
return 1;
|
||||
if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -1008,39 +1005,27 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
|
||||
struct re_fail_stack_t *fs;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
||||
int i, err, dest_node, cur_entity;
|
||||
int i, err, dest_node;
|
||||
dest_node = -1;
|
||||
cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE)
|
||||
? dfa->nodes[node].opr.ctx_info->entity : node);
|
||||
if (IS_EPSILON_NODE (dfa->nodes[node].type))
|
||||
{
|
||||
int ndest, dest_nodes[2], dest_entities[2];
|
||||
re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
|
||||
int ndest, dest_nodes[2];
|
||||
err = re_node_set_insert (eps_via_nodes, node);
|
||||
if (BE (err < 0, 0))
|
||||
return -1;
|
||||
/* Pick up valid destinations. */
|
||||
for (ndest = 0, i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
|
||||
for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
|
||||
{
|
||||
int candidate = mctx->state_log[*pidx]->nodes.elems[i];
|
||||
int entity;
|
||||
entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE)
|
||||
? dfa->nodes[candidate].opr.ctx_info->entity : candidate);
|
||||
if (!re_node_set_contains (dfa->edests + node, entity))
|
||||
int candidate = dfa->edests[node].elems[i];
|
||||
if (!re_node_set_contains (cur_nodes, candidate))
|
||||
continue;
|
||||
dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
|
||||
dest_entities[0] = (ndest == 0) ? entity : dest_entities[0];
|
||||
dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
|
||||
dest_entities[1] = (ndest == 1) ? entity : dest_entities[1];
|
||||
++ndest;
|
||||
}
|
||||
if (ndest <= 1)
|
||||
return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
|
||||
if (dest_entities[0] > dest_entities[1])
|
||||
{
|
||||
int swap_work = dest_nodes[0];
|
||||
dest_nodes[0] = dest_nodes[1];
|
||||
dest_nodes[1] = swap_work;
|
||||
}
|
||||
/* In order to avoid infinite loop like "(a*)*". */
|
||||
if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
|
||||
return dest_nodes[1];
|
||||
@ -1050,22 +1035,17 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
|
||||
}
|
||||
else
|
||||
{
|
||||
int naccepted = 0, entity = node;
|
||||
int naccepted = 0;
|
||||
re_token_type_t type = dfa->nodes[node].type;
|
||||
if (type == OP_CONTEXT_NODE)
|
||||
{
|
||||
entity = dfa->nodes[node].opr.ctx_info->entity;
|
||||
type = dfa->nodes[entity].type;
|
||||
}
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (ACCEPT_MB_NODE (type))
|
||||
naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
|
||||
naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (type == OP_BACK_REF)
|
||||
{
|
||||
int subexp_idx = dfa->nodes[entity].opr.idx;
|
||||
int subexp_idx = dfa->nodes[node].opr.idx;
|
||||
naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
|
||||
if (fs != NULL)
|
||||
{
|
||||
@ -1085,18 +1065,10 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
|
||||
err = re_node_set_insert (eps_via_nodes, node);
|
||||
if (BE (err < 0, 0))
|
||||
return -2;
|
||||
dest_node = dfa->nexts[node];
|
||||
dest_node = dfa->edests[node].elems[0];
|
||||
if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
|
||||
dest_node))
|
||||
return dest_node;
|
||||
for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
|
||||
{
|
||||
dest_node = mctx->state_log[*pidx]->nodes.elems[i];
|
||||
if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE
|
||||
&& (dfa->nexts[node]
|
||||
== dfa->nodes[dest_node].opr.ctx_info->entity)))
|
||||
return dest_node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1153,6 +1125,7 @@ pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
|
||||
*pidx = fs->stack[num].idx;
|
||||
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
|
||||
re_node_set_free (eps_via_nodes);
|
||||
re_free (fs->stack[num].regs);
|
||||
*eps_via_nodes = fs->stack[num].eps_via_nodes;
|
||||
return fs->stack[num].node;
|
||||
}
|
||||
@ -1201,12 +1174,18 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
|
||||
if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
|
||||
break;
|
||||
if (reg_idx == nmatch)
|
||||
return REG_NOERROR;
|
||||
{
|
||||
re_node_set_free (&eps_via_nodes);
|
||||
return free_fail_stack_return (fs);
|
||||
}
|
||||
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
|
||||
&eps_via_nodes);
|
||||
}
|
||||
else
|
||||
return REG_NOERROR;
|
||||
{
|
||||
re_node_set_free (&eps_via_nodes);
|
||||
return REG_NOERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* Proceed to next node. */
|
||||
@ -1221,10 +1200,30 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
|
||||
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
|
||||
&eps_via_nodes);
|
||||
else
|
||||
return REG_NOMATCH;
|
||||
{
|
||||
re_node_set_free (&eps_via_nodes);
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
}
|
||||
}
|
||||
re_node_set_free (&eps_via_nodes);
|
||||
return free_fail_stack_return (fs);
|
||||
}
|
||||
|
||||
static reg_errcode_t
|
||||
free_fail_stack_return (fs)
|
||||
struct re_fail_stack_t *fs;
|
||||
{
|
||||
if (fs)
|
||||
{
|
||||
int fs_idx;
|
||||
for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
|
||||
{
|
||||
re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
|
||||
re_free (fs->stack[fs_idx].regs);
|
||||
}
|
||||
re_free (fs->stack);
|
||||
}
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
@ -1314,6 +1313,7 @@ sift_states_backward (preg, mctx, sctx)
|
||||
{
|
||||
memset (sctx->sifted_states, '\0',
|
||||
sizeof (re_dfastate_t *) * str_idx);
|
||||
re_node_set_free (&cur_dest);
|
||||
return REG_NOERROR;
|
||||
}
|
||||
re_node_set_empty (&cur_dest);
|
||||
@ -1330,22 +1330,16 @@ sift_states_backward (preg, mctx, sctx)
|
||||
for (i = 0; i < cur_src->nelem; i++)
|
||||
{
|
||||
int prev_node = cur_src->elems[i];
|
||||
int entity = prev_node;
|
||||
int naccepted = 0;
|
||||
re_token_type_t type = dfa->nodes[prev_node].type;
|
||||
|
||||
if (IS_EPSILON_NODE(type))
|
||||
continue;
|
||||
if (type == OP_CONTEXT_NODE)
|
||||
{
|
||||
entity = dfa->nodes[prev_node].opr.ctx_info->entity;
|
||||
type = dfa->nodes[entity].type;
|
||||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* If the node may accept `multi byte'. */
|
||||
if (ACCEPT_MB_NODE (type))
|
||||
naccepted = sift_states_iter_mb (preg, mctx, sctx, entity, str_idx,
|
||||
sctx->last_str_idx);
|
||||
naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
|
||||
str_idx, sctx->last_str_idx);
|
||||
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
/* We don't check backreferences here.
|
||||
@ -1459,12 +1453,15 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
|
||||
|
||||
/* At first, add the nodes which can epsilon transit to a node in
|
||||
DEST_NODE. */
|
||||
err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
if (dest_nodes->nelem)
|
||||
{
|
||||
err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Then, check the limitations in the current sift_context. */
|
||||
if (sctx->limits.nelem)
|
||||
if (dest_nodes->nelem && sctx->limits.nelem)
|
||||
{
|
||||
err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
|
||||
mctx->bkref_ents, str_idx);
|
||||
@ -1479,7 +1476,7 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
|
||||
|
||||
/* If we are searching for the subexpression candidates.
|
||||
Note that we were from transit_state_bkref_loop() in this case. */
|
||||
if (sctx->check_subexp)
|
||||
if (dest_nodes->nelem && sctx->check_subexp)
|
||||
{
|
||||
err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
@ -1538,7 +1535,7 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
|
||||
int cur_node = inv_eclosure->elems[ecl_idx];
|
||||
if (cur_node == node)
|
||||
continue;
|
||||
if (dfa->edests[cur_node].nelem)
|
||||
if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
|
||||
{
|
||||
int edst1 = dfa->edests[cur_node].elems[0];
|
||||
int edst2 = ((dfa->edests[cur_node].nelem > 1)
|
||||
@ -1580,12 +1577,10 @@ check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
|
||||
|
||||
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
|
||||
{
|
||||
int bkref, subexp_idx/*, node_idx, cls_node*/;
|
||||
int subexp_idx;
|
||||
struct re_backref_cache_entry *ent;
|
||||
ent = mctx->bkref_ents + limits->elems[lim_idx];
|
||||
bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
|
||||
? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
|
||||
subexp_idx = dfa->nodes[bkref].opr.idx - 1;
|
||||
subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
|
||||
|
||||
dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
|
||||
dfa->eclosures + dst_node,
|
||||
@ -1624,13 +1619,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
|
||||
for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
|
||||
{
|
||||
int node = eclosures->elems[node_idx];
|
||||
int entity = node;
|
||||
re_token_type_t type= dfa->nodes[node].type;
|
||||
if (type == OP_CONTEXT_NODE)
|
||||
{
|
||||
entity = dfa->nodes[node].opr.ctx_info->entity;
|
||||
type = dfa->nodes[entity].type;
|
||||
}
|
||||
if (type == OP_BACK_REF)
|
||||
{
|
||||
int bi;
|
||||
@ -1641,7 +1630,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
|
||||
&& ent->str_idx == str_idx)
|
||||
{
|
||||
int cpos, dst;
|
||||
dst = dfa->nexts[node];
|
||||
dst = dfa->edests[node].elems[0];
|
||||
cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
|
||||
dfa->eclosures + dst,
|
||||
subexp_idx, dst,
|
||||
@ -1685,17 +1674,14 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
|
||||
|
||||
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
|
||||
{
|
||||
int bkref, subexp_idx;
|
||||
int subexp_idx;
|
||||
struct re_backref_cache_entry *ent;
|
||||
ent = bkref_ents + limits->elems[lim_idx];
|
||||
|
||||
if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
|
||||
continue; /* This is unrelated limitation. */
|
||||
|
||||
bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
|
||||
? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
|
||||
subexp_idx = dfa->nodes[bkref].opr.idx - 1;
|
||||
|
||||
subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
|
||||
if (ent->subexp_to == str_idx)
|
||||
{
|
||||
int ops_node = -1;
|
||||
@ -1790,12 +1776,8 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes)
|
||||
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
|
||||
{
|
||||
re_token_type_t type;
|
||||
int entity;
|
||||
node = dest_nodes->elems[node_idx];
|
||||
type = dfa->nodes[node].type;
|
||||
entity = (type != OP_CONTEXT_NODE ? node
|
||||
: dfa->nodes[node].opr.ctx_info->entity);
|
||||
type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
|
||||
|
||||
if (type == OP_CLOSE_SUBEXP
|
||||
&& sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
|
||||
@ -1933,14 +1915,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
|
||||
|
||||
for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
|
||||
{
|
||||
int entity;
|
||||
int cur_bkref_idx = re_string_cur_idx (mctx->input);
|
||||
re_token_type_t type;
|
||||
node = candidates->elems[node_idx];
|
||||
type = dfa->nodes[node].type;
|
||||
entity = (type != OP_CONTEXT_NODE ? node
|
||||
: dfa->nodes[node].opr.ctx_info->entity);
|
||||
type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
|
||||
if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
|
||||
continue;
|
||||
/* Avoid infinite loop for the REs like "()\1+". */
|
||||
@ -1951,37 +1929,25 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
|
||||
int enabled_idx;
|
||||
for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
|
||||
{
|
||||
int disabled_idx, subexp_len, to_idx;
|
||||
int disabled_idx, subexp_len, to_idx, dst_node;
|
||||
struct re_backref_cache_entry *entry;
|
||||
entry = mctx->bkref_ents + enabled_idx;
|
||||
subexp_len = entry->subexp_to - entry->subexp_from;
|
||||
to_idx = str_idx + subexp_len;
|
||||
dst_node = (subexp_len ? dfa->nexts[node]
|
||||
: dfa->edests[node].elems[0]);
|
||||
|
||||
if (entry->node != node || entry->str_idx != str_idx
|
||||
|| to_idx > sctx->last_str_idx
|
||||
|| sctx->sifted_states[to_idx] == NULL)
|
||||
continue;
|
||||
if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
|
||||
dfa->nexts[node]))
|
||||
{
|
||||
int dst_idx;
|
||||
re_node_set *dsts = &sctx->sifted_states[to_idx]->nodes;
|
||||
for (dst_idx = 0; dst_idx < dsts->nelem; ++dst_idx)
|
||||
{
|
||||
int dst_node = dsts->elems[dst_idx];
|
||||
if (dfa->nodes[dst_node].type == OP_CONTEXT_NODE
|
||||
&& (dfa->nodes[dst_node].opr.ctx_info->entity
|
||||
== dfa->nexts[node]))
|
||||
break;
|
||||
}
|
||||
if (dst_idx == dsts->nelem)
|
||||
continue;
|
||||
}
|
||||
if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node))
|
||||
continue;
|
||||
|
||||
if (check_dst_limits (dfa, &sctx->limits, mctx, node,
|
||||
str_idx, dfa->nexts[node], to_idx))
|
||||
continue;
|
||||
if (sctx->check_subexp == dfa->nodes[entity].opr.idx)
|
||||
if (sctx->check_subexp == dfa->nodes[node].opr.idx)
|
||||
{
|
||||
char *buf;
|
||||
buf = (char *) re_string_get_buffer (mctx->input);
|
||||
@ -2038,9 +2004,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
|
||||
return err;
|
||||
}
|
||||
local_sctx.sifted_states[str_idx] = cur_state;
|
||||
re_node_set_remove_at (&local_sctx.limits,
|
||||
local_sctx.limits.nelem - 1);
|
||||
entry->flag = 1;
|
||||
re_node_set_remove (&local_sctx.limits, enabled_idx);
|
||||
/* We must not use the variable entry here, since
|
||||
mctx->bkref_ents might be realloced. */
|
||||
mctx->bkref_ents[enabled_idx].flag = 1;
|
||||
}
|
||||
}
|
||||
for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
|
||||
@ -2301,15 +2268,14 @@ transit_state_mb (preg, pstate, mctx)
|
||||
unsigned int context;
|
||||
re_dfastate_t *dest_state;
|
||||
|
||||
if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE)
|
||||
if (dfa->nodes[cur_node_idx].constraint)
|
||||
{
|
||||
context = re_string_context_at (mctx->input,
|
||||
re_string_cur_idx (mctx->input),
|
||||
mctx->eflags, preg->newline_anchor);
|
||||
if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
|
||||
context))
|
||||
context))
|
||||
continue;
|
||||
cur_node_idx = dfa->nodes[cur_node_idx].opr.ctx_info->entity;
|
||||
}
|
||||
|
||||
/* How many bytes the node can accepts? */
|
||||
@ -2404,17 +2370,16 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
|
||||
/* Check whether `node' is a backreference or not. */
|
||||
if (node->type == OP_BACK_REF)
|
||||
subexp_idx = node->opr.idx;
|
||||
else if (node->type == OP_CONTEXT_NODE &&
|
||||
dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
|
||||
else
|
||||
continue;
|
||||
|
||||
if (node->constraint)
|
||||
{
|
||||
context = re_string_context_at (mctx->input, cur_str_idx,
|
||||
mctx->eflags, preg->newline_anchor);
|
||||
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
|
||||
continue;
|
||||
subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx;
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
/* `node' is a backreference.
|
||||
Check the substring which the substring matched. */
|
||||
@ -2440,8 +2405,8 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
|
||||
if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
|
||||
continue;
|
||||
subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
|
||||
new_dest_nodes = ((node->type == OP_CONTEXT_NODE && subexp_len == 0)
|
||||
? dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure
|
||||
new_dest_nodes = (subexp_len == 0
|
||||
? dfa->eclosures + dfa->edests[node_idx].elems[0]
|
||||
: dfa->eclosures + dfa->nexts[node_idx]);
|
||||
dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
|
||||
- bkref_ent->subexp_from);
|
||||
@ -2688,16 +2653,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
/* For all the nodes belonging to `state', */
|
||||
for (i = 0; i < cur_nodes->nelem; ++i)
|
||||
{
|
||||
unsigned int constraint = 0;
|
||||
re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
|
||||
re_token_type_t type = node->type;
|
||||
|
||||
if (type == OP_CONTEXT_NODE)
|
||||
{
|
||||
constraint = node->constraint;
|
||||
node = dfa->nodes + node->opr.ctx_info->entity;
|
||||
type = node->type;
|
||||
}
|
||||
unsigned int constraint = node->constraint;
|
||||
|
||||
/* Enumerate all single byte character this node can accept. */
|
||||
if (type == CHARACTER)
|
||||
@ -2724,10 +2682,10 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
if (constraint & NEXT_WORD_CONSTRAINT)
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
accepts[j] &= dfa->word_char[j];
|
||||
else if (constraint & NEXT_NOTWORD_CONSTRAINT)
|
||||
if (constraint & NEXT_NOTWORD_CONSTRAINT)
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
accepts[j] &= ~dfa->word_char[j];
|
||||
else if (constraint & NEXT_NEWLINE_CONSTRAINT)
|
||||
if (constraint & NEXT_NEWLINE_CONSTRAINT)
|
||||
{
|
||||
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
|
||||
bitset_empty (accepts);
|
||||
@ -3058,10 +3016,8 @@ check_node_accept (preg, node, mctx, idx)
|
||||
const re_match_context_t *mctx;
|
||||
int idx;
|
||||
{
|
||||
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
const re_token_t *cur_node;
|
||||
unsigned char ch;
|
||||
if (node->type == OP_CONTEXT_NODE)
|
||||
if (node->constraint)
|
||||
{
|
||||
/* The node has constraints. Check whether the current context
|
||||
satisfies the constraints. */
|
||||
@ -3070,17 +3026,13 @@ check_node_accept (preg, node, mctx, idx)
|
||||
preg->newline_anchor);
|
||||
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
|
||||
return 0;
|
||||
cur_node = dfa->nodes + node->opr.ctx_info->entity;
|
||||
}
|
||||
else
|
||||
cur_node = node;
|
||||
|
||||
ch = re_string_byte_at (mctx->input, idx);
|
||||
if (cur_node->type == CHARACTER)
|
||||
return cur_node->opr.c == ch;
|
||||
else if (cur_node->type == SIMPLE_BRACKET)
|
||||
return bitset_contain (cur_node->opr.sbcset, ch);
|
||||
else if (cur_node->type == OP_PERIOD)
|
||||
if (node->type == CHARACTER)
|
||||
return node->opr.c == ch;
|
||||
else if (node->type == SIMPLE_BRACKET)
|
||||
return bitset_contain (node->opr.sbcset, ch);
|
||||
else if (node->type == OP_PERIOD)
|
||||
return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
|
||||
|| (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
|
||||
else
|
||||
@ -3221,5 +3173,7 @@ sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
|
||||
sctx->last_node = last_node;
|
||||
sctx->last_str_idx = last_str_idx;
|
||||
sctx->check_subexp = check_subexp;
|
||||
sctx->cur_bkref = -1;
|
||||
sctx->cls_subexp_idx = -1;
|
||||
re_node_set_init_empty (&sctx->limits);
|
||||
}
|
||||
|
@ -290,8 +290,8 @@ asm (".L__X'%ebx = 1\n\t"
|
||||
#undef INLINE_SYSCALL
|
||||
#define INLINE_SYSCALL(name, nr, args...) \
|
||||
({ \
|
||||
unsigned int resultvar = INTERNAL_SYSCALL(name, nr, args); \
|
||||
if (INTERNAL_SYSCALL_ERROR_P (resultvar)) \
|
||||
unsigned int resultvar = INTERNAL_SYSCALL (name, nr, args); \
|
||||
if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (resultvar), 0)) \
|
||||
{ \
|
||||
__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar)); \
|
||||
resultvar = 0xffffffff; \
|
||||
|
Loading…
Reference in New Issue
Block a user