mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-27 03:41:23 +08:00
Update.
2002-02-28 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (regcomp): Remove a redundant condition. (init_word_char): Add a check on malloc failure. (create_initial_state): Likewise. (duplicate_node): Likewise. (calc_eclosure): Likewise. (calc_eclosure_iter): Likewise. (parse_expression): Likewise. (parse_bracket_exp): Remove unnecessary malloc invocations. (build_equiv_class): Likewise. (build_charclass): Likewise. * posix/regex_internal.c (re_node_set_intersect): Add a check on malloc failure. (re_node_set_add_intersect): Likewise. (re_node_set_merge): Likewise. (re_acquire_state): Likewise. (re_acquire_state_context): Likewise. (create_newstate_common): Likewise. (register_state): Likewise. (create_ci_newstate): Likewise. (create_cd_newstate): Likewise. * posix/regex_internal.h: Fix prototypes of re_acquire_state and re_acquire_state_context. * posix/regexec.c (regexec): Suit it to the error handling of re_search_internal. (re_match): Likewise. (re_search): Likewise. (re_search_internal): Add a check on malloc failure. (acquire_init_state_context): Likewise. (check_matching): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (build_trtable): Likewise. (group_nodes_into_DFAstates): Likewise. (match_ctx_init): Likewise. (match_ctx_add_entry): Likewise.
This commit is contained in:
parent
51f38e87b1
commit
a9388965cc
45
ChangeLog
45
ChangeLog
@ -1,3 +1,48 @@
|
||||
2002-02-28 Isamu Hasegawa <isamu@yamato.ibm.com>
|
||||
|
||||
* posix/regcomp.c (regcomp): Remove a redundant condition.
|
||||
(init_word_char): Add a check on malloc failure.
|
||||
(create_initial_state): Likewise.
|
||||
(duplicate_node): Likewise.
|
||||
(calc_eclosure): Likewise.
|
||||
(calc_eclosure_iter): Likewise.
|
||||
(parse_expression): Likewise.
|
||||
(parse_bracket_exp): Remove unnecessary malloc invocations.
|
||||
(build_equiv_class): Likewise.
|
||||
(build_charclass): Likewise.
|
||||
* posix/regex_internal.c (re_node_set_intersect): Add a check
|
||||
on malloc failure.
|
||||
(re_node_set_add_intersect): Likewise.
|
||||
(re_node_set_merge): Likewise.
|
||||
(re_acquire_state): Likewise.
|
||||
(re_acquire_state_context): Likewise.
|
||||
(create_newstate_common): Likewise.
|
||||
(register_state): Likewise.
|
||||
(create_ci_newstate): Likewise.
|
||||
(create_cd_newstate): Likewise.
|
||||
* posix/regex_internal.h: Fix prototypes of re_acquire_state
|
||||
and re_acquire_state_context.
|
||||
* posix/regexec.c (regexec): Suit it to the error handling of
|
||||
re_search_internal.
|
||||
(re_match): Likewise.
|
||||
(re_search): Likewise.
|
||||
(re_search_internal): Add a check on malloc failure.
|
||||
(acquire_init_state_context): Likewise.
|
||||
(check_matching): Likewise.
|
||||
(proceed_next_node): Likewise.
|
||||
(set_regs): Likewise.
|
||||
(sift_states_backward): Likewise.
|
||||
(sift_states_iter_bkref): Likewise.
|
||||
(add_epsilon_backreference): Likewise.
|
||||
(transit_state): Likewise.
|
||||
(transit_state_sb): Likewise.
|
||||
(transit_state_mb): Likewise.
|
||||
(transit_state_bkref_loop): Likewise.
|
||||
(build_trtable): Likewise.
|
||||
(group_nodes_into_DFAstates): Likewise.
|
||||
(match_ctx_init): Likewise.
|
||||
(match_ctx_add_entry): Likewise.
|
||||
|
||||
2002-02-27 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* elf/dl-load.c (_dl_map_object_from_fd): Always add SONAME to
|
||||
|
214
posix/regcomp.c
214
posix/regcomp.c
@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp,
|
||||
const re_dfastate_t *init_state,
|
||||
char *fastmap);
|
||||
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
|
||||
static void init_word_char (re_dfa_t *dfa);
|
||||
static reg_errcode_t init_word_char (re_dfa_t *dfa);
|
||||
static void free_charset (re_charset_t *cset);
|
||||
static void free_workarea_compile (regex_t *preg);
|
||||
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
|
||||
@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
|
||||
static int duplicate_node (re_dfa_t *dfa, int org_idx,
|
||||
static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
|
||||
unsigned int constraint);
|
||||
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
|
||||
static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root);
|
||||
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
|
||||
int node, int root);
|
||||
static void calc_inveclosure (re_dfa_t *dfa);
|
||||
static int fetch_number (re_string_t *input, re_token_t *token,
|
||||
reg_syntax_t syntax);
|
||||
@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags)
|
||||
if (ret == REG_ERPAREN)
|
||||
ret = REG_EPAREN;
|
||||
|
||||
/* XXX Why the test for preg->fastmap != NULL? */
|
||||
if (ret == REG_NOERROR && preg->fastmap != NULL)
|
||||
/* We have already checked preg->fastmap != NULL. */
|
||||
if (ret == REG_NOERROR)
|
||||
{
|
||||
/* Compute the fastmap now, since regexec cannot modify the pattern
|
||||
buffer. */
|
||||
@ -772,16 +773,19 @@ init_dfa (dfa, pat_len)
|
||||
"word". In this case "word" means that it is the word construction
|
||||
character used by some operators like "\<", "\>", etc. */
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
init_word_char (dfa)
|
||||
re_dfa_t *dfa;
|
||||
{
|
||||
int i, j, ch;
|
||||
dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
|
||||
if (dfa->word_char == NULL)
|
||||
return REG_ESPACE;
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if (isalnum (ch) || ch == '_')
|
||||
dfa->word_char[i] |= 1 << j;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Free the work area which are only used while compiling. */
|
||||
@ -844,24 +848,28 @@ create_initial_state (dfa)
|
||||
}
|
||||
|
||||
/* It must be the first time to invoke acquire_state. */
|
||||
dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0);
|
||||
dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
|
||||
/* We don't check ERR here, since the initial state must not be NULL. */
|
||||
if (dfa->init_state == NULL)
|
||||
return err;
|
||||
if (dfa->init_state->has_constraint)
|
||||
{
|
||||
dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes,
|
||||
dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
|
||||
CONTEXT_WORD);
|
||||
dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes,
|
||||
dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
|
||||
CONTEXT_NEWLINE);
|
||||
dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes,
|
||||
dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
|
||||
&init_nodes,
|
||||
CONTEXT_NEWLINE
|
||||
| CONTEXT_BEGBUF);
|
||||
if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|
||||
|| dfa->init_state_begbuf == NULL)
|
||||
return err;
|
||||
}
|
||||
else
|
||||
dfa->init_state_word = dfa->init_state_nl
|
||||
= dfa->init_state_begbuf = dfa->init_state;
|
||||
|
||||
if (dfa->init_state == NULL || dfa->init_state_word == NULL
|
||||
|| dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL )
|
||||
return REG_ESPACE;
|
||||
re_node_set_free (&init_nodes);
|
||||
return REG_NOERROR;
|
||||
}
|
||||
@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
duplicate_node (dfa, org_idx, constraint)
|
||||
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
|
||||
The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
|
||||
otherwise return the error code. */
|
||||
|
||||
static reg_errcode_t
|
||||
duplicate_node (new_idx, dfa, org_idx, constraint)
|
||||
re_dfa_t *dfa;
|
||||
int org_idx;
|
||||
int *new_idx, org_idx;
|
||||
unsigned int constraint;
|
||||
{
|
||||
re_token_t dup;
|
||||
int dup_idx;
|
||||
reg_errcode_t err;
|
||||
|
||||
dup.type = OP_CONTEXT_NODE;
|
||||
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
|
||||
{
|
||||
/* If the node whose index is ORG_IDX is the same as the intended
|
||||
node, use it. */
|
||||
if (dfa->nodes[org_idx].constraint == constraint)
|
||||
return org_idx;
|
||||
{
|
||||
*new_idx = org_idx;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
dup.constraint = constraint |
|
||||
dfa->nodes[org_idx].constraint;
|
||||
}
|
||||
@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint)
|
||||
/* In case that `entity' points OP_CONTEXT_NODE,
|
||||
we correct `entity' to real entity in calc_inveclosures(). */
|
||||
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
|
||||
dup_idx = re_dfa_add_node (dfa, dup, 1);
|
||||
if (dup.opr.ctx_info == NULL || dup_idx == -1)
|
||||
return REG_ESPACE;
|
||||
dup.opr.ctx_info->entity = org_idx;
|
||||
dup.opr.ctx_info->bkref_eclosure = NULL;
|
||||
dup_idx = re_dfa_add_node (dfa, dup, 1);
|
||||
dfa->nodes[dup_idx].duplicated = 1;
|
||||
|
||||
dfa->nodes[dup_idx].duplicated = 1;
|
||||
dfa->firsts[dup_idx] = dfa->firsts[org_idx];
|
||||
dfa->nexts[dup_idx] = dfa->nexts[org_idx];
|
||||
re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
|
||||
err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
/* Since we don't duplicate epsilon nodes, epsilon closure have
|
||||
only itself. */
|
||||
re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
|
||||
re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
|
||||
err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
/* Then we must update inveclosure for this node.
|
||||
We process them at last part of calc_eclosure(),
|
||||
since we don't complete to calculate them here. */
|
||||
|
||||
return dup_idx;
|
||||
*new_idx = dup_idx;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1193,6 +1220,7 @@ calc_eclosure (dfa)
|
||||
/* For each nodes, calculate epsilon closure. */
|
||||
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_node_set eclosure_elem;
|
||||
if (node_idx == max)
|
||||
{
|
||||
@ -1210,7 +1238,9 @@ calc_eclosure (dfa)
|
||||
if (dfa->eclosures[node_idx].nelem != 0)
|
||||
continue;
|
||||
/* Calculate epsilon closure of `node_idx'. */
|
||||
eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1);
|
||||
err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
|
||||
if (dfa->eclosures[node_idx].nelem == 0)
|
||||
{
|
||||
@ -1241,7 +1271,13 @@ calc_eclosure (dfa)
|
||||
{
|
||||
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
|
||||
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
|
||||
dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint);
|
||||
{
|
||||
reg_errcode_t err;
|
||||
err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
|
||||
constraint);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
re_node_set_insert (bkref_eclosure, dest_node_idx);
|
||||
}
|
||||
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
|
||||
@ -1252,15 +1288,19 @@ calc_eclosure (dfa)
|
||||
|
||||
/* Calculate epsilon closure of NODE. */
|
||||
|
||||
static re_node_set
|
||||
calc_eclosure_iter (dfa, node, root)
|
||||
static reg_errcode_t
|
||||
calc_eclosure_iter (new_set, dfa, node, root)
|
||||
re_node_set *new_set;
|
||||
re_dfa_t *dfa;
|
||||
int node, root;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
unsigned int constraint;
|
||||
int i, max, incomplete = 0;
|
||||
re_node_set eclosure;
|
||||
re_node_set_alloc (&eclosure, 1);
|
||||
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
|
||||
/* This indicates that we are calculating this node now.
|
||||
We reference this value to avoid infinite loop. */
|
||||
@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root)
|
||||
/* If we haven't calculated the epsilon closure of `edest' yet,
|
||||
calculate now. Otherwise use calculated epsilon closure. */
|
||||
if (dfa->eclosures[edest].nelem == 0)
|
||||
eclosure_elem = calc_eclosure_iter (dfa, edest, 0);
|
||||
{
|
||||
err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
else
|
||||
eclosure_elem = dfa->eclosures[edest];
|
||||
/* Merge the epsilon closure of `edest'. */
|
||||
@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root)
|
||||
int dest = eclosure.elems[i];
|
||||
if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
|
||||
{
|
||||
int dup_dest = duplicate_node (dfa, dest, constraint);
|
||||
int dup_dest;
|
||||
reg_errcode_t err;
|
||||
err = duplicate_node (&dup_dest, dfa, dest, constraint);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
if (dest != dup_dest)
|
||||
{
|
||||
re_node_set_remove_at (&eclosure, i--);
|
||||
@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root)
|
||||
dfa->eclosures[node].nelem = 0;
|
||||
else
|
||||
dfa->eclosures[node] = eclosure;
|
||||
return eclosure;
|
||||
*new_set = eclosure;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Functions for token which are used in the parser. */
|
||||
@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
||||
break;
|
||||
case ANCHOR:
|
||||
if (dfa->word_char == NULL)
|
||||
init_word_char (dfa);
|
||||
{
|
||||
*err = init_word_char (dfa);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
if (token->opr.ctx_type == WORD_DELIM)
|
||||
{
|
||||
bin_tree_t *tree_first, *tree_last;
|
||||
@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
||||
I'm not sure, but maybe enough. */
|
||||
#define BRACKET_NAME_BUF_SIZE 32
|
||||
|
||||
static inline void *
|
||||
extend_array_for_cset (array, num, alloc, type_size)
|
||||
void *array;
|
||||
int num, *alloc, type_size;
|
||||
{
|
||||
void *new_array = array;
|
||||
if (*alloc == num)
|
||||
{
|
||||
if (*alloc == 0)
|
||||
{
|
||||
new_array = malloc (type_size);
|
||||
*alloc = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_array = realloc (array, type_size * num * 2);
|
||||
*alloc = 2 * num;
|
||||
}
|
||||
}
|
||||
return new_array;
|
||||
}
|
||||
|
||||
/* This function parse bracket expression like "[abc]", "[a-c]",
|
||||
"[[.a-a.]]" etc. */
|
||||
|
||||
@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
uint32_t *new_array_end;
|
||||
int new_nranges;
|
||||
|
||||
/* XXX If mbcset->range_starts and mbcset->range_ends are NULL
|
||||
if *range_alloc == 0 then we do not need the if. */
|
||||
if (*range_alloc == 0)
|
||||
{
|
||||
new_nranges = 1;
|
||||
new_array_start = re_malloc (uint32_t, 1);
|
||||
new_array_end = re_malloc (uint32_t, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_nranges = 2 * mbcset->nranges;
|
||||
/* +1 in case of mbcset->nranges is 0. */
|
||||
new_nranges = 2 * mbcset->nranges + 1;
|
||||
/* Use realloc since mbcset->range_starts and mbcset->range_ends
|
||||
are NULL if *range_alloc == 0. */
|
||||
new_array_start = re_realloc (mbcset->range_starts, uint32_t,
|
||||
new_nranges);
|
||||
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
|
||||
new_nranges);
|
||||
}
|
||||
|
||||
if (new_array_start == NULL || new_array_end == NULL)
|
||||
return REG_ESPACE;
|
||||
|
||||
@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
|
||||
/* Got valid collation sequence, add it as a new entry. */
|
||||
/* Check the space of the arrays. */
|
||||
mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms,
|
||||
mbcset->ncoll_syms,
|
||||
coll_sym_alloc,
|
||||
sizeof (int32_t));
|
||||
if (*coll_sym_alloc == mbcset->ncoll_syms)
|
||||
{
|
||||
/* Not enough, realloc it. */
|
||||
/* +1 in case of mbcset->ncoll_syms is 0. */
|
||||
*coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
|
||||
/* Use realloc since mbcset->coll_syms is NULL
|
||||
if *alloc == 0. */
|
||||
mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
|
||||
*coll_sym_alloc);
|
||||
if (mbcset->coll_syms == NULL)
|
||||
return REG_ESPACE;
|
||||
|
||||
}
|
||||
mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
bitset_set (sbcset, start_elem.opr.ch);
|
||||
break;
|
||||
case MB_CHAR:
|
||||
mbcset->mbchars = extend_array_for_cset (mbcset->mbchars,
|
||||
mbcset->nmbchars,
|
||||
&mbchar_alloc,
|
||||
sizeof (wchar_t));
|
||||
/* Check whether the array has enough space. */
|
||||
if (mbchar_alloc == mbcset->nmbchars)
|
||||
{
|
||||
/* Not enough, realloc it. */
|
||||
/* +1 in case of mbcset->nmbchars is 0. */
|
||||
mbchar_alloc = 2 * mbcset->nmbchars + 1;
|
||||
/* Use realloc since array is NULL if *alloc == 0. */
|
||||
mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
|
||||
mbchar_alloc);
|
||||
if (mbcset->mbchars == NULL)
|
||||
goto parse_bracket_exp_espace;
|
||||
}
|
||||
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
|
||||
break;
|
||||
case EQUIV_CLASS:
|
||||
@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name)
|
||||
bitset_set (sbcset, ch);
|
||||
}
|
||||
}
|
||||
/* Check the space of the arrays, and extend if we need. */
|
||||
mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes,
|
||||
mbcset->nequiv_classes,
|
||||
equiv_class_alloc,
|
||||
sizeof (int32_t));
|
||||
/* Check whether the array has enough space. */
|
||||
if (*equiv_class_alloc == mbcset->nequiv_classes)
|
||||
{
|
||||
/* Not enough, realloc it. */
|
||||
/* +1 in case of mbcset->nequiv_classes is 0. */
|
||||
*equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
|
||||
/* Use realloc since the array is NULL if *alloc == 0. */
|
||||
mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
|
||||
*equiv_class_alloc);
|
||||
if (mbcset->equiv_classes == NULL)
|
||||
return REG_ESPACE;
|
||||
|
||||
}
|
||||
mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
|
||||
}
|
||||
else
|
||||
@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name)
|
||||
int i;
|
||||
|
||||
/* Check the space of the arrays. */
|
||||
mbcset->char_classes = extend_array_for_cset (mbcset->char_classes,
|
||||
mbcset->nchar_classes,
|
||||
char_class_alloc,
|
||||
sizeof (wctype_t));
|
||||
if (*char_class_alloc == mbcset->nchar_classes)
|
||||
{
|
||||
/* Not enough, realloc it. */
|
||||
/* +1 in case of mbcset->nchar_classes is 0. */
|
||||
*char_class_alloc = 2 * mbcset->nchar_classes + 1;
|
||||
/* Use realloc since array is NULL if *alloc == 0. */
|
||||
mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
|
||||
*char_class_alloc);
|
||||
if (mbcset->char_classes == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
|
||||
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
|
||||
|
||||
|
@ -68,6 +68,8 @@ static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
|
||||
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
|
||||
const re_node_set *nodes,
|
||||
unsigned int hash);
|
||||
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
|
||||
unsigned int hash);
|
||||
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
|
||||
const re_node_set *nodes,
|
||||
unsigned int hash);
|
||||
@ -473,6 +475,10 @@ re_node_set_init_copy (dest, src)
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Calculate the intersection of the sets SRC1 and SRC2. And store it in
|
||||
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
|
||||
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_node_set_intersect (dest, src1, src2)
|
||||
re_node_set *dest;
|
||||
@ -483,31 +489,28 @@ re_node_set_intersect (dest, src1, src2)
|
||||
{
|
||||
if (src1->nelem + src2->nelem > dest->alloc)
|
||||
{
|
||||
int *new_array;
|
||||
if (dest->alloc == 0)
|
||||
new_array = re_malloc (int, src1->nelem + src2->nelem);
|
||||
else
|
||||
new_array = re_realloc (dest->elems, int,
|
||||
src1->nelem + src2->nelem);
|
||||
dest->alloc = src1->nelem + src2->nelem;
|
||||
if (new_array == NULL)
|
||||
dest->elems = re_realloc (dest->elems, int, dest->alloc);
|
||||
if (dest->elems == NULL)
|
||||
return REG_ESPACE;
|
||||
dest->elems = new_array;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The intersection of empty sets is also empty set. */
|
||||
dest->nelem = 0;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
|
||||
for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
|
||||
{
|
||||
if (src1->elems[i1] > src2->elems[i2])
|
||||
{
|
||||
++i2;
|
||||
continue;
|
||||
}
|
||||
/* The intersection must have the elements which are in both of
|
||||
SRC1 and SRC2. */
|
||||
if (src1->elems[i1] == src2->elems[i2])
|
||||
dest->elems[id++] = src2->elems[i2++];
|
||||
++i1;
|
||||
@ -516,6 +519,10 @@ re_node_set_intersect (dest, src1, src2)
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
|
||||
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
|
||||
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_node_set_add_intersect (dest, src1, src2)
|
||||
re_node_set *dest;
|
||||
@ -526,16 +533,10 @@ re_node_set_add_intersect (dest, src1, src2)
|
||||
{
|
||||
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
|
||||
{
|
||||
int *new_array;
|
||||
if (dest->alloc == 0)
|
||||
new_array = re_malloc (int, src1->nelem + src2->nelem);
|
||||
else
|
||||
new_array = re_realloc (dest->elems, int,
|
||||
src1->nelem + src2->nelem + dest->nelem);
|
||||
dest->alloc = src1->nelem + src2->nelem + dest->nelem;
|
||||
if (new_array == NULL)
|
||||
dest->elems = re_realloc (dest->elems, int, dest->alloc);
|
||||
if (dest->elems == NULL)
|
||||
return REG_ESPACE;
|
||||
dest->elems = new_array;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -567,6 +568,9 @@ re_node_set_add_intersect (dest, src1, src2)
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
|
||||
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_node_set_init_union (dest, src1, src2)
|
||||
re_node_set *dest;
|
||||
@ -617,6 +621,9 @@ re_node_set_init_union (dest, src1, src2)
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Calculate the union set of the sets DEST and SRC. And store it to
|
||||
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_node_set_merge (dest, src)
|
||||
re_node_set *dest;
|
||||
@ -628,12 +635,16 @@ re_node_set_merge (dest, src)
|
||||
else if (dest == NULL)
|
||||
{
|
||||
dest = re_malloc (re_node_set, 1);
|
||||
if (dest == NULL)
|
||||
return REG_ESPACE;
|
||||
return re_node_set_init_copy (dest, src);
|
||||
}
|
||||
if (dest->alloc < src->nelem + dest->nelem)
|
||||
{
|
||||
dest->alloc = 2 * (src->nelem + dest->alloc);
|
||||
dest->elems = re_realloc (dest->elems, int, dest->alloc);
|
||||
if (dest->elems == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
|
||||
for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
|
||||
@ -860,18 +871,28 @@ calc_state_hash (nodes, context)
|
||||
|
||||
/* Search for the state whose node_set is equivalent to NODES.
|
||||
Return the pointer to the state, if we found it in the DFA.
|
||||
Otherwise create the new one and return it. */
|
||||
Otherwise create the new one and return it. In case of an error
|
||||
return NULL and set the error code in ERR.
|
||||
Note: - We assume NULL as the invalid state, then it is possible that
|
||||
return value is NULL and ERR is REG_NOERROR.
|
||||
- We never return non-NULL value in case of any errors, it is for
|
||||
optimization. */
|
||||
|
||||
static re_dfastate_t *
|
||||
re_acquire_state (dfa, nodes)
|
||||
static re_dfastate_t*
|
||||
re_acquire_state (err, dfa, nodes)
|
||||
reg_errcode_t *err;
|
||||
re_dfa_t *dfa;
|
||||
const re_node_set *nodes;
|
||||
{
|
||||
unsigned int hash;
|
||||
re_dfastate_t *new_state;
|
||||
struct re_state_table_entry *spot;
|
||||
int i;
|
||||
if (nodes->nelem == 0)
|
||||
{
|
||||
*err = REG_NOERROR;
|
||||
return NULL;
|
||||
}
|
||||
hash = calc_state_hash (nodes, 0);
|
||||
spot = dfa->state_table + (hash & dfa->state_hash_mask);
|
||||
|
||||
@ -893,25 +914,42 @@ re_acquire_state (dfa, nodes)
|
||||
}
|
||||
|
||||
/* There are no appropriate state in the dfa, create the new one. */
|
||||
return create_ci_newstate (dfa, nodes, hash);
|
||||
new_state = create_ci_newstate (dfa, nodes, hash);
|
||||
if (new_state != NULL)
|
||||
return new_state;
|
||||
else
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Search for the state whose node_set is equivalent to NODES and
|
||||
whose context is equivalent to CONTEXT.
|
||||
Return the pointer to the state, if we found it in the DFA.
|
||||
Otherwise create the new one and return it. */
|
||||
Otherwise create the new one and return it. In case of an error
|
||||
return NULL and set the error code in ERR.
|
||||
Note: - We assume NULL as the invalid state, then it is possible that
|
||||
return value is NULL and ERR is REG_NOERROR.
|
||||
- We never return non-NULL value in case of any errors, it is for
|
||||
optimization. */
|
||||
|
||||
static re_dfastate_t *
|
||||
re_acquire_state_context (dfa, nodes, context)
|
||||
static re_dfastate_t*
|
||||
re_acquire_state_context (err, dfa, nodes, context)
|
||||
reg_errcode_t *err;
|
||||
re_dfa_t *dfa;
|
||||
const re_node_set *nodes;
|
||||
unsigned int context;
|
||||
{
|
||||
unsigned int hash;
|
||||
re_dfastate_t *new_state;
|
||||
struct re_state_table_entry *spot;
|
||||
int i;
|
||||
if (nodes->nelem == 0)
|
||||
{
|
||||
*err = REG_NOERROR;
|
||||
return NULL;
|
||||
}
|
||||
hash = calc_state_hash (nodes, context);
|
||||
spot = dfa->state_table + (hash & dfa->state_hash_mask);
|
||||
|
||||
@ -934,9 +972,19 @@ re_acquire_state_context (dfa, nodes, context)
|
||||
return state;
|
||||
}
|
||||
/* There are no appropriate state in `dfa', create the new one. */
|
||||
return create_cd_newstate (dfa, nodes, context, hash);
|
||||
new_state = create_cd_newstate (dfa, nodes, context, hash);
|
||||
if (new_state != NULL)
|
||||
return new_state;
|
||||
else
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate memory for DFA state and initialize common properties.
|
||||
Return the new state if succeeded, otherwise return NULL. */
|
||||
|
||||
static re_dfastate_t *
|
||||
create_newstate_common (dfa, nodes, hash)
|
||||
re_dfa_t *dfa;
|
||||
@ -945,6 +993,8 @@ create_newstate_common (dfa, nodes, hash)
|
||||
{
|
||||
re_dfastate_t *newstate;
|
||||
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
|
||||
if (newstate == NULL)
|
||||
return NULL;
|
||||
re_node_set_init_copy (&newstate->nodes, nodes);
|
||||
newstate->trtable = NULL;
|
||||
newstate->trtable_search = NULL;
|
||||
@ -952,7 +1002,10 @@ create_newstate_common (dfa, nodes, hash)
|
||||
return newstate;
|
||||
}
|
||||
|
||||
static void
|
||||
/* Store the new state NEWSTATE whose hash value is HASH in appropriate
|
||||
position. Return value indicate the error code if failed. */
|
||||
|
||||
static reg_errcode_t
|
||||
register_state (dfa, newstate, hash)
|
||||
re_dfa_t *dfa;
|
||||
re_dfastate_t *newstate;
|
||||
@ -972,8 +1025,7 @@ register_state (dfa, newstate, hash)
|
||||
spot->alloc = 4;
|
||||
new_array = re_malloc (re_dfastate_t *, spot->alloc);
|
||||
if (new_array == NULL)
|
||||
/* XXX return value */
|
||||
return;
|
||||
return REG_ESPACE;
|
||||
new_array[0] = spot->entry.state;
|
||||
}
|
||||
else
|
||||
@ -985,8 +1037,12 @@ register_state (dfa, newstate, hash)
|
||||
spot->entry.array = new_array;
|
||||
}
|
||||
spot->entry.array[spot->num++] = newstate;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Create the new state which is independ of contexts.
|
||||
Return the new state if succeeded, otherwise return NULL. */
|
||||
|
||||
static re_dfastate_t *
|
||||
create_ci_newstate (dfa, nodes, hash)
|
||||
re_dfa_t *dfa;
|
||||
@ -994,8 +1050,11 @@ create_ci_newstate (dfa, nodes, hash)
|
||||
unsigned int hash;
|
||||
{
|
||||
int i;
|
||||
reg_errcode_t err;
|
||||
re_dfastate_t *newstate;
|
||||
newstate = create_newstate_common (dfa, nodes, hash);
|
||||
if (newstate == NULL)
|
||||
return NULL;
|
||||
newstate->entrance_nodes = &newstate->nodes;
|
||||
|
||||
for (i = 0 ; i < nodes->nelem ; i++)
|
||||
@ -1021,11 +1080,13 @@ create_ci_newstate (dfa, nodes, hash)
|
||||
newstate->halt = 1;
|
||||
}
|
||||
}
|
||||
|
||||
register_state (dfa, newstate, hash);
|
||||
return newstate;
|
||||
err = register_state (dfa, newstate, hash);
|
||||
return (err != REG_NOERROR) ? NULL : newstate;
|
||||
}
|
||||
|
||||
/* Create the new state which is depend on the context CONTEXT.
|
||||
Return the new state if succeeded, otherwise return NULL. */
|
||||
|
||||
static re_dfastate_t *
|
||||
create_cd_newstate (dfa, nodes, context, hash)
|
||||
re_dfa_t *dfa;
|
||||
@ -1033,9 +1094,12 @@ create_cd_newstate (dfa, nodes, context, hash)
|
||||
unsigned int context, hash;
|
||||
{
|
||||
int i, nctx_nodes = 0;
|
||||
reg_errcode_t err;
|
||||
re_dfastate_t *newstate;
|
||||
|
||||
newstate = create_newstate_common (dfa, nodes, hash);
|
||||
if (newstate == NULL)
|
||||
return NULL;
|
||||
newstate->context = context;
|
||||
newstate->entrance_nodes = &newstate->nodes;
|
||||
|
||||
@ -1076,7 +1140,6 @@ create_cd_newstate (dfa, nodes, context, hash)
|
||||
{
|
||||
newstate->entrance_nodes = re_malloc (re_node_set, 1);
|
||||
if (newstate->entrance_nodes == NULL)
|
||||
/* XXX Return which value? */
|
||||
return NULL;
|
||||
re_node_set_init_copy (newstate->entrance_nodes, nodes);
|
||||
nctx_nodes = 0;
|
||||
@ -1090,6 +1153,6 @@ create_cd_newstate (dfa, nodes, context, hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
register_state (dfa, newstate, hash);
|
||||
return newstate;
|
||||
err = register_state (dfa, newstate, hash);
|
||||
return (err != REG_NOERROR) ? NULL : newstate;
|
||||
}
|
||||
|
@ -426,9 +426,10 @@ static void re_node_set_remove_at (re_node_set *set, int idx);
|
||||
#define re_node_set_empty(p) ((p)->nelem = 0)
|
||||
#define re_node_set_free(set) re_free ((set)->elems)
|
||||
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
|
||||
static re_dfastate_t *re_acquire_state (re_dfa_t *dfa,
|
||||
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
|
||||
const re_node_set *nodes);
|
||||
static re_dfastate_t *re_acquire_state_context (re_dfa_t *dfa,
|
||||
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
|
||||
re_dfa_t *dfa,
|
||||
const re_node_set *nodes,
|
||||
unsigned int context);
|
||||
|
||||
|
384
posix/regexec.c
384
posix/regexec.c
@ -38,15 +38,19 @@
|
||||
#include "regex.h"
|
||||
#include "regex_internal.h"
|
||||
|
||||
static void match_ctx_init (re_match_context_t *cache, int eflags, int n);
|
||||
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
|
||||
int n);
|
||||
static void match_ctx_free (re_match_context_t *cache);
|
||||
static void match_ctx_add_entry (re_match_context_t *cache, int node, int from,
|
||||
int to);
|
||||
static int re_search_internal (const regex_t *preg, const char *string,
|
||||
int length, int start, int range, size_t nmatch,
|
||||
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
|
||||
int from, int to);
|
||||
static reg_errcode_t re_search_internal (const regex_t *preg,
|
||||
const char *string, int length,
|
||||
int start, int range, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags);
|
||||
static inline re_dfastate_t *acquire_init_state_context (const regex_t *preg,
|
||||
const re_string_t *input, int idx, int eflags);
|
||||
static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
|
||||
const regex_t *preg,
|
||||
const re_string_t *input,
|
||||
int idx, int eflags);
|
||||
static int check_matching (const regex_t *preg, re_string_t *input,
|
||||
re_match_context_t *mctx, re_dfastate_t **state_log,
|
||||
int start_idx, int fl_search, int fl_longest_match);
|
||||
@ -61,9 +65,10 @@ static int proceed_next_node (const regex_t *preg,
|
||||
const re_match_context_t *mctx,
|
||||
const re_string_t *input,
|
||||
int *pidx, int node, re_node_set *eps_via_nodes);
|
||||
static void set_regs (const regex_t *preg, re_dfastate_t **state_log,
|
||||
const re_match_context_t *mctx, const re_string_t *input,
|
||||
size_t nmatch, regmatch_t *pmatch, int last);
|
||||
static reg_errcode_t set_regs (const regex_t *preg, re_dfastate_t **state_log,
|
||||
const re_match_context_t *mctx,
|
||||
const re_string_t *input, size_t nmatch,
|
||||
regmatch_t *pmatch, int last);
|
||||
static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log,
|
||||
const re_match_context_t *mctx,
|
||||
const re_string_t *input, int node_idx,
|
||||
@ -73,31 +78,35 @@ static int sift_states_iter_bkref (const re_dfa_t *dfa,
|
||||
struct re_backref_cache_entry *mctx_entry,
|
||||
int node_idx, int idx, int match_first,
|
||||
int match_last);
|
||||
static void sift_states_backward (const regex_t *preg,
|
||||
static reg_errcode_t sift_states_backward (const regex_t *preg,
|
||||
re_dfastate_t **state_log,
|
||||
const re_match_context_t *mctx,
|
||||
const re_string_t *input, int last_node);
|
||||
static void add_epsilon_backreference (const re_dfa_t *dfa,
|
||||
const re_string_t *input,
|
||||
int last_node);
|
||||
static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
|
||||
const re_match_context_t *mctx,
|
||||
const re_node_set *plog, int idx,
|
||||
const re_node_set *plog,
|
||||
int idx,
|
||||
re_node_set *state_buf);
|
||||
static re_dfastate_t *transit_state (const regex_t *preg, re_dfastate_t *state,
|
||||
re_string_t *input, int fl_search,
|
||||
re_dfastate_t **state_log,
|
||||
static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
|
||||
re_dfastate_t *state, re_string_t *input,
|
||||
int fl_search, re_dfastate_t **state_log,
|
||||
re_match_context_t *mctx);
|
||||
static re_dfastate_t *transit_state_sb (const regex_t *preg,
|
||||
static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
|
||||
re_dfastate_t *pstate,
|
||||
re_string_t *input, int fl_search,
|
||||
re_match_context_t *mctx);
|
||||
static void transit_state_mb (const regex_t *preg, re_dfastate_t *pstate,
|
||||
static reg_errcode_t transit_state_mb (const regex_t *preg,
|
||||
re_dfastate_t *pstate,
|
||||
const re_string_t *input,
|
||||
re_dfastate_t **state_log,
|
||||
re_match_context_t *mctx);
|
||||
static void transit_state_bkref (const regex_t *preg, re_dfastate_t *pstate,
|
||||
static reg_errcode_t transit_state_bkref (const regex_t *preg,
|
||||
re_dfastate_t *pstate,
|
||||
const re_string_t *input,
|
||||
re_dfastate_t **state_log,
|
||||
re_match_context_t *mctx);
|
||||
static void transit_state_bkref_loop (const regex_t *preg,
|
||||
static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
|
||||
const re_string_t *input,
|
||||
re_node_set *nodes,
|
||||
re_dfastate_t **work_state_log,
|
||||
@ -141,13 +150,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
|
||||
regmatch_t pmatch[];
|
||||
int eflags;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
int length = strlen (string);
|
||||
if (preg->no_sub)
|
||||
return re_search_internal (preg, string, length, 0, length, 0,
|
||||
err = re_search_internal (preg, string, length, 0, length, 0,
|
||||
NULL, eflags);
|
||||
else
|
||||
return re_search_internal (preg, string, length, 0, length, nmatch,
|
||||
err = re_search_internal (preg, string, length, 0, length, nmatch,
|
||||
pmatch, eflags);
|
||||
return err != REG_NOERROR;
|
||||
}
|
||||
#ifdef _LIBC
|
||||
weak_alias (__regexec, regexec)
|
||||
@ -164,7 +175,8 @@ re_match (buffer, string, length, start, regs)
|
||||
int length, start;
|
||||
struct re_registers *regs;
|
||||
{
|
||||
int i, nregs, result, rval, eflags = 0;
|
||||
reg_errcode_t result;
|
||||
int i, nregs, rval, eflags = 0;
|
||||
regmatch_t *pmatch;
|
||||
|
||||
eflags |= (buffer->not_bol) ? REG_NOTBOL : 0;
|
||||
@ -238,7 +250,7 @@ re_match (buffer, string, length, start, regs)
|
||||
}
|
||||
}
|
||||
/* Return value is -1 if not match, the length of mathing otherwise. */
|
||||
rval = (result) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
|
||||
rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
|
||||
re_free (pmatch);
|
||||
return rval;
|
||||
}
|
||||
@ -290,7 +302,8 @@ re_search (bufp, string, size, startpos, range, regs)
|
||||
int size, startpos, range;
|
||||
struct re_registers *regs;
|
||||
{
|
||||
int i, nregs, result, real_range, rval, eflags = 0;
|
||||
reg_errcode_t result;
|
||||
int i, nregs, real_range, rval, eflags = 0;
|
||||
regmatch_t *pmatch;
|
||||
|
||||
eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
|
||||
@ -376,7 +389,7 @@ re_search (bufp, string, size, startpos, range, regs)
|
||||
}
|
||||
/* Return value is -1 if not match, the position where the mathing starts
|
||||
otherwise. */
|
||||
rval = (result) ? -1 : pmatch[0].rm_so;
|
||||
rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so;
|
||||
re_free (pmatch);
|
||||
return rval;
|
||||
}
|
||||
@ -486,11 +499,12 @@ static re_node_set empty_set;
|
||||
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
|
||||
mingings with regexec. START, and RANGE have the same meanings
|
||||
with re_search.
|
||||
Return 0 if we find a match and REG_NOMATCH if not.
|
||||
Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
|
||||
otherwise return the error code.
|
||||
Note: We assume front end functions already check ranges.
|
||||
(START + RANGE >= 0 && START + RANGE <= LENGTH) */
|
||||
|
||||
static int
|
||||
static reg_errcode_t
|
||||
re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
const regex_t *preg;
|
||||
const char *string;
|
||||
@ -498,6 +512,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
size_t nmatch;
|
||||
regmatch_t pmatch[];
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
||||
re_string_t input;
|
||||
re_dfastate_t **state_log;
|
||||
@ -510,7 +525,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
if (preg->used == 0 || dfa->init_state == NULL
|
||||
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|
||||
|| dfa->init_state_begbuf == NULL)
|
||||
return 1;
|
||||
return REG_NOMATCH;
|
||||
|
||||
re_node_set_init_empty (&empty_set);
|
||||
|
||||
@ -522,16 +537,24 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
back-reference or a node which can accept multibyte character or
|
||||
multi character collating element. */
|
||||
if (nmatch > 1 || dfa->has_mb_node)
|
||||
{
|
||||
state_log = re_malloc (re_dfastate_t *, length + 1);
|
||||
if (state_log == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
else
|
||||
state_log = NULL;
|
||||
|
||||
if (preg->syntax & RE_ICASE)
|
||||
re_string_construct_toupper (&input, string, length, preg->translate);
|
||||
err = re_string_construct_toupper (&input, string, length, preg->translate);
|
||||
else
|
||||
re_string_construct (&input, string, length, preg->translate);
|
||||
err = re_string_construct (&input, string, length, preg->translate);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
|
||||
match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
|
||||
err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
|
||||
#ifdef DEBUG
|
||||
/* We assume front-end functions already check them. */
|
||||
@ -557,7 +580,12 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
match_last = check_matching (preg, &input, &mctx, state_log,
|
||||
match_first, 0, fl_longest_match);
|
||||
if (match_last != -1)
|
||||
break;
|
||||
{
|
||||
if (match_last == -2)
|
||||
return REG_ESPACE;
|
||||
else
|
||||
break; /* We found a matching. */
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Update counter. */
|
||||
@ -598,8 +626,13 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
#endif
|
||||
halt_node = check_halt_state_context (preg, pstate, &input,
|
||||
match_last, eflags);
|
||||
sift_states_backward (preg, state_log, &mctx, &input, halt_node);
|
||||
set_regs (preg, state_log, &mctx, &input, nmatch, pmatch, halt_node);
|
||||
err = sift_states_backward (preg, state_log, &mctx, &input, halt_node);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
err = set_regs (preg, state_log, &mctx, &input, nmatch, pmatch,
|
||||
halt_node);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
@ -607,21 +640,23 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
|
||||
if (dfa->nbackref)
|
||||
match_ctx_free (&mctx);
|
||||
re_string_destruct (&input);
|
||||
return match_last == -1;
|
||||
return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Acquire an initial state.
|
||||
/* Acquire an initial state and return it.
|
||||
We must select appropriate initial state depending on the context,
|
||||
since initial states may have constraints like "\<", "^", etc.. */
|
||||
|
||||
static inline re_dfastate_t *
|
||||
acquire_init_state_context (preg, input, idx, eflags)
|
||||
acquire_init_state_context (err, preg, input, idx, eflags)
|
||||
reg_errcode_t *err;
|
||||
const regex_t *preg;
|
||||
const re_string_t *input;
|
||||
int idx, eflags;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
|
||||
*err = REG_NOERROR;
|
||||
if (dfa->init_state->has_constraint)
|
||||
{
|
||||
unsigned int context;
|
||||
@ -636,9 +671,12 @@ acquire_init_state_context (preg, input, idx, eflags)
|
||||
else if (IS_NEWLINE_CONTEXT (context))
|
||||
return dfa->init_state_nl;
|
||||
else if (IS_BEGBUF_CONTEXT (context))
|
||||
{
|
||||
/* It is relatively rare case, then calculate on demand. */
|
||||
return re_acquire_state_context (dfa, dfa->init_state->entrance_nodes,
|
||||
return re_acquire_state_context (err, dfa,
|
||||
dfa->init_state->entrance_nodes,
|
||||
context);
|
||||
}
|
||||
else
|
||||
/* Must not happen? */
|
||||
return dfa->init_state;
|
||||
@ -648,7 +686,8 @@ acquire_init_state_context (preg, input, idx, eflags)
|
||||
}
|
||||
|
||||
/* Check whether the regular expression match input string INPUT or not,
|
||||
and return the index where the matching end, or return -1 if not match.
|
||||
and return the index where the matching end, return -1 if not match,
|
||||
or return -2 in case of an error.
|
||||
FL_SEARCH means we must search where the matching starts,
|
||||
FL_LONGEST_MATCH means we want the POSIX longest matching. */
|
||||
|
||||
@ -661,11 +700,15 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
|
||||
re_dfastate_t **state_log;
|
||||
int start_idx, fl_search, fl_longest_match;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
int match = 0, match_last = -1;
|
||||
re_dfastate_t *cur_state;
|
||||
|
||||
cur_state = acquire_init_state_context (preg, input, start_idx,
|
||||
cur_state = acquire_init_state_context (&err, preg, input, start_idx,
|
||||
mctx->eflags);
|
||||
/* An initial state must not be NULL(invalid state). */
|
||||
if (cur_state == NULL)
|
||||
return -2;
|
||||
if (state_log != NULL)
|
||||
state_log[start_idx] = cur_state;
|
||||
/* If the RE accepts NULL string. */
|
||||
@ -687,11 +730,13 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
|
||||
|
||||
while (!re_string_eoi (input))
|
||||
{
|
||||
cur_state = transit_state (preg, cur_state, input, fl_search && !match,
|
||||
state_log, mctx);
|
||||
if (cur_state == NULL) /* Reached at the invalid state. */
|
||||
cur_state = transit_state (&err, preg, cur_state, input,
|
||||
fl_search && !match, state_log, mctx);
|
||||
if (cur_state == NULL) /* Reached at the invalid state or an error. */
|
||||
{
|
||||
int cur_str_idx = re_string_cur_idx (input);
|
||||
if (err != REG_NOERROR)
|
||||
return -2;
|
||||
if (fl_search && !match)
|
||||
{
|
||||
/* Restart from initial state, since we are searching
|
||||
@ -699,9 +744,11 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx))
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
cur_state = acquire_init_state_context (preg, input,
|
||||
cur_state = acquire_init_state_context (&err, preg, input,
|
||||
cur_str_idx,
|
||||
mctx->eflags);
|
||||
if (cur_state == NULL && err != REG_NOERROR)
|
||||
return -2;
|
||||
if (state_log != NULL)
|
||||
state_log[cur_str_idx] = cur_state;
|
||||
}
|
||||
@ -787,9 +834,10 @@ check_halt_state_context (preg, state, input, idx, eflags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compute the next node to which "NFA" transit from NODE.
|
||||
Return the destination node, and update EPS_VIA_NODES.
|
||||
("NFA" is a NFA corresponding to the DFA. */
|
||||
/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
|
||||
corresponding to the DFA).
|
||||
Return the destination node, and update EPS_VIA_NODES, return -1 in case
|
||||
of errors. */
|
||||
|
||||
static int
|
||||
proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
|
||||
@ -801,10 +849,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
|
||||
re_node_set *eps_via_nodes;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
||||
int i, dest_node = -1;
|
||||
int i, dest_node = -1, err;
|
||||
if (IS_EPSILON_NODE (dfa->nodes[node].type))
|
||||
{
|
||||
re_node_set_insert (eps_via_nodes, node);
|
||||
err = re_node_set_insert (eps_via_nodes, node);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i)
|
||||
{
|
||||
int candidate = state_log[*pidx]->nodes.elems[i];
|
||||
@ -845,7 +895,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
|
||||
}
|
||||
if (naccepted == 0)
|
||||
{
|
||||
re_node_set_insert (eps_via_nodes, node);
|
||||
err = re_node_set_insert (eps_via_nodes, node);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
dest_node = dfa->nexts[node];
|
||||
if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node))
|
||||
return dest_node;
|
||||
@ -885,7 +937,7 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
|
||||
Note: We assume that pmatch[0] is already set, and
|
||||
pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
|
||||
const regex_t *preg;
|
||||
re_dfastate_t **state_log;
|
||||
@ -944,9 +996,11 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
|
||||
/* Proceed to next node. */
|
||||
cur_node = proceed_next_node (preg, state_log, mctx, input, &idx,
|
||||
cur_node, &eps_via_nodes);
|
||||
if (cur_node < 0)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
re_node_set_free (&eps_via_nodes);
|
||||
return;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
#define NUMBER_OF_STATE 1
|
||||
@ -974,7 +1028,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
|
||||
#define STATE_NODE_CONTAINS(state,node) \
|
||||
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
sift_states_backward (preg, state_log, mctx, input, last_node)
|
||||
const regex_t *preg;
|
||||
re_dfastate_t **state_log;
|
||||
@ -982,6 +1036,7 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
|
||||
const re_string_t *input;
|
||||
int last_node;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
||||
re_node_set state_buf;
|
||||
int str_idx = mctx->match_last;
|
||||
@ -990,18 +1045,28 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
|
||||
#ifdef DEBUG
|
||||
assert (state_log != NULL && state_log[str_idx] != NULL);
|
||||
#endif
|
||||
re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
|
||||
err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
plog = &state_log[str_idx]->nodes;
|
||||
|
||||
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
|
||||
transit to the last_node and the last_node itself. */
|
||||
re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
|
||||
err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
|
||||
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
|
||||
add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
|
||||
{
|
||||
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Update state log. */
|
||||
state_log[str_idx] = re_acquire_state (dfa, &state_buf);
|
||||
state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
|
||||
if (state_log[str_idx] == NULL && err != REG_NOERROR)
|
||||
return err;
|
||||
|
||||
/* Then check each states in the state_log. */
|
||||
while (str_idx > mctx->match_first)
|
||||
@ -1062,17 +1127,26 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
|
||||
|
||||
/* `prev_node' may point the entity of the OP_CONTEXT_NODE,
|
||||
then we use plog->elems[i] instead. */
|
||||
re_node_set_add_intersect (&state_buf, plog,
|
||||
err = re_node_set_add_intersect (&state_buf, plog,
|
||||
dfa->inveclosures + prev_node);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
|
||||
add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
|
||||
{
|
||||
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Update state_log. */
|
||||
state_log[str_idx] = re_acquire_state (dfa, &state_buf);
|
||||
state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
|
||||
if (state_log[str_idx] == NULL && err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
|
||||
re_node_set_free (&state_buf);
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Helper functions. */
|
||||
@ -1136,7 +1210,7 @@ sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_first,
|
||||
return naccepted;
|
||||
}
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
|
||||
const re_dfa_t *dfa;
|
||||
const re_match_context_t *mctx;
|
||||
@ -1164,12 +1238,16 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
|
||||
}
|
||||
if (j < mctx->nbkref_ents || idx == mctx->match_first)
|
||||
{
|
||||
re_node_set_add_intersect (state_buf, plog,
|
||||
reg_errcode_t err;
|
||||
err = re_node_set_add_intersect (state_buf, plog,
|
||||
dfa->inveclosures + node_idx);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Functions for state transition. */
|
||||
@ -1180,7 +1258,8 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
|
||||
update the destination of STATE_LOG. */
|
||||
|
||||
static re_dfastate_t *
|
||||
transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
transit_state (err, preg, state, input, fl_search, state_log, mctx)
|
||||
reg_errcode_t *err;
|
||||
const regex_t *preg;
|
||||
re_dfastate_t *state, **state_log;
|
||||
re_string_t *input;
|
||||
@ -1191,6 +1270,7 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
re_dfastate_t **trtable, *next_state;
|
||||
unsigned char ch;
|
||||
|
||||
*err = REG_NOERROR;
|
||||
if (state == NULL)
|
||||
{
|
||||
next_state = state;
|
||||
@ -1200,7 +1280,11 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
{
|
||||
/* If the current state can accept multibyte. */
|
||||
if (state->accept_mb)
|
||||
transit_state_mb (preg, state, input, state_log, mctx);
|
||||
{
|
||||
*err = transit_state_mb (preg, state, input, state_log, mctx);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Then decide the next state with the single byte. */
|
||||
if (1)
|
||||
@ -1221,7 +1305,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
else
|
||||
{
|
||||
/* don't use transition table */
|
||||
next_state = transit_state_sb (preg, state, input, fl_search, mctx);
|
||||
next_state = transit_state_sb (err, preg, state, input, fl_search,
|
||||
mctx);
|
||||
if (next_state == NULL && err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1252,7 +1339,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
if (next_state != NULL)
|
||||
{
|
||||
table_nodes = next_state->entrance_nodes;
|
||||
re_node_set_init_union (&next_nodes, table_nodes, log_nodes);
|
||||
*err = re_node_set_init_union (&next_nodes, table_nodes,
|
||||
log_nodes);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
next_nodes = *log_nodes;
|
||||
@ -1262,14 +1352,19 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
context = re_string_context_at (input, re_string_cur_idx (input) - 1,
|
||||
mctx->eflags, preg->newline_anchor);
|
||||
next_state = state_log[cur_idx]
|
||||
= re_acquire_state_context (dfa, &next_nodes, context);
|
||||
= re_acquire_state_context (err, dfa, &next_nodes, context);
|
||||
/* We don't need to check errors here, since the return value of
|
||||
this function is next_state and ERR is already set. */
|
||||
|
||||
if (table_nodes != NULL)
|
||||
re_node_set_free (&next_nodes);
|
||||
}
|
||||
/* If the next state has back references. */
|
||||
if (next_state != NULL && next_state->has_backref)
|
||||
{
|
||||
transit_state_bkref (preg, next_state, input, state_log, mctx);
|
||||
*err = transit_state_bkref (preg, next_state, input, state_log, mctx);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
next_state = state_log[cur_idx];
|
||||
}
|
||||
}
|
||||
@ -1282,7 +1377,8 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
|
||||
accepting the current input byte. */
|
||||
|
||||
static re_dfastate_t *
|
||||
transit_state_sb (preg, state, input, fl_search, mctx)
|
||||
transit_state_sb (err, preg, state, input, fl_search, mctx)
|
||||
reg_errcode_t *err;
|
||||
const regex_t *preg;
|
||||
re_dfastate_t *state;
|
||||
re_string_t *input;
|
||||
@ -1295,14 +1391,20 @@ transit_state_sb (preg, state, input, fl_search, mctx)
|
||||
int node_cnt, cur_str_idx = re_string_cur_idx (input);
|
||||
unsigned int context;
|
||||
|
||||
re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
|
||||
*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
|
||||
{
|
||||
int cur_node = state->nodes.elems[node_cnt];
|
||||
if (check_node_accept (preg, dfa->nodes + cur_node, input,
|
||||
cur_str_idx, mctx->eflags))
|
||||
re_node_set_merge (&next_nodes,
|
||||
{
|
||||
*err = re_node_set_merge (&next_nodes,
|
||||
dfa->eclosures + dfa->nexts[cur_node]);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if (fl_search)
|
||||
{
|
||||
@ -1317,23 +1419,32 @@ transit_state_sb (preg, state, input, fl_search, mctx)
|
||||
}
|
||||
if (!not_initial)
|
||||
#endif
|
||||
re_node_set_merge (&next_nodes, dfa->init_state->entrance_nodes);
|
||||
{
|
||||
*err = re_node_set_merge (&next_nodes,
|
||||
dfa->init_state->entrance_nodes);
|
||||
if (*err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
context = re_string_context_at (input, cur_str_idx, mctx->eflags,
|
||||
preg->newline_anchor);
|
||||
next_state = re_acquire_state_context (dfa, &next_nodes, context);
|
||||
next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
|
||||
/* We don't need to check errors here, since the return value of
|
||||
this function is next_state and ERR is already set. */
|
||||
|
||||
re_node_set_free (&next_nodes);
|
||||
re_string_skip_bytes (input, 1);
|
||||
return next_state;
|
||||
}
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
transit_state_mb (preg, pstate, input, state_log, mctx)
|
||||
const regex_t *preg;
|
||||
re_dfastate_t *pstate, **state_log;
|
||||
const re_string_t *input;
|
||||
re_match_context_t *mctx;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
int i;
|
||||
|
||||
@ -1376,39 +1487,50 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
|
||||
if (dest_state == NULL)
|
||||
dest_nodes = *new_nodes;
|
||||
else
|
||||
re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
|
||||
new_nodes);
|
||||
{
|
||||
err = re_node_set_init_union (&dest_nodes,
|
||||
dest_state->entrance_nodes, new_nodes);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
context = re_string_context_at (input, dest_idx - 1, mctx->eflags,
|
||||
preg->newline_anchor);
|
||||
state_log[dest_idx] = re_acquire_state_context (dfa, &dest_nodes, context);
|
||||
state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes,
|
||||
context);
|
||||
if (state_log[dest_idx] == NULL && err != REG_NOERROR)
|
||||
return err;
|
||||
if (dest_state != NULL)
|
||||
re_node_set_free (&dest_nodes);
|
||||
}
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
transit_state_bkref (preg, pstate, input, state_log, mctx)
|
||||
const regex_t *preg;
|
||||
re_dfastate_t *pstate, **state_log;
|
||||
const re_string_t *input;
|
||||
re_match_context_t *mctx;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfastate_t **work_state_log;
|
||||
|
||||
#ifdef DEBUG
|
||||
assert (mctx->match_first != -1);
|
||||
#endif
|
||||
work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1);
|
||||
if (work_state_log == NULL)
|
||||
return REG_ESPACE;
|
||||
|
||||
transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
|
||||
err = transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
|
||||
state_log, mctx);
|
||||
|
||||
re_free (work_state_log);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Caller must allocate `work_state_log'. */
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
|
||||
const regex_t *preg;
|
||||
const re_string_t *input;
|
||||
@ -1416,10 +1538,13 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
|
||||
re_dfastate_t **work_state_log, **state_log;
|
||||
re_match_context_t *mctx;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
int i, j;
|
||||
regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
|
||||
int cur_str_idx = re_string_cur_idx (input);
|
||||
if (cur_regs == NULL)
|
||||
return REG_ESPACE;
|
||||
|
||||
for (i = 0; i < nodes->nelem; ++i)
|
||||
{
|
||||
@ -1474,7 +1599,9 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
|
||||
|
||||
/* Successfully matched, add a new cache entry. */
|
||||
dest_str_idx = cur_str_idx + subexp_len;
|
||||
match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
|
||||
err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
clean_state_log_if_need (state_log, mctx, dest_str_idx);
|
||||
|
||||
/* And add the epsilon closures (which is `new_dest_nodes') of
|
||||
@ -1494,29 +1621,44 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
|
||||
: state_log[cur_str_idx]->nodes.nelem);
|
||||
/* Add `new_dest_node' to state_log. */
|
||||
if (dest_state == NULL)
|
||||
state_log[dest_str_idx] = re_acquire_state_context (dfa,
|
||||
{
|
||||
state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
|
||||
new_dest_nodes,
|
||||
context);
|
||||
if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
else
|
||||
{
|
||||
re_node_set dest_nodes;
|
||||
re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
|
||||
err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
|
||||
new_dest_nodes);
|
||||
state_log[dest_str_idx] = re_acquire_state_context (dfa, &dest_nodes,
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
|
||||
&dest_nodes,
|
||||
context);
|
||||
if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
|
||||
return err;
|
||||
re_node_set_free (&dest_nodes);
|
||||
}
|
||||
|
||||
/* We need to check recursively if the backreference can epsilon
|
||||
transit. */
|
||||
if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem)
|
||||
transit_state_bkref_loop (preg, input, new_dest_nodes, work_state_log,
|
||||
state_log, mctx);
|
||||
{
|
||||
err = transit_state_bkref_loop (preg, input, new_dest_nodes,
|
||||
work_state_log, state_log, mctx);
|
||||
if (err != REG_NOERROR)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
re_free (cur_regs);
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Build transition table for the state. */
|
||||
/* Build transition table for the state.
|
||||
Return the new table if succeeded, otherwise return NULL. */
|
||||
|
||||
static re_dfastate_t **
|
||||
build_trtable (preg, state, fl_search)
|
||||
@ -1524,6 +1666,7 @@ build_trtable (preg, state, fl_search)
|
||||
const re_dfastate_t *state;
|
||||
int fl_search;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
int i, j, k, ch;
|
||||
int ndests; /* Number of the destination states from `state'. */
|
||||
@ -1541,15 +1684,18 @@ build_trtable (preg, state, fl_search)
|
||||
|
||||
/* Initialize transiton table. */
|
||||
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
|
||||
if (dests_node == NULL || dests_ch == NULL || trtable == NULL)
|
||||
return NULL;
|
||||
|
||||
/* At first, group all nodes belonging to `state' into several
|
||||
destinations. */
|
||||
ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
|
||||
if (ndests == 0)
|
||||
if (ndests <= 0)
|
||||
{
|
||||
re_free (dests_node);
|
||||
re_free (dests_ch);
|
||||
return trtable;
|
||||
/* Return NULL in case of an error, trtable otherwise. */
|
||||
return (ndests < 0) ? NULL : trtable;
|
||||
}
|
||||
|
||||
dest_states = re_malloc (re_dfastate_t *, ndests);
|
||||
@ -1557,7 +1703,11 @@ build_trtable (preg, state, fl_search)
|
||||
dest_states_nl = re_malloc (re_dfastate_t *, ndests);
|
||||
bitset_empty (acceptable);
|
||||
|
||||
re_node_set_alloc (&follows, ndests + 1);
|
||||
err = re_node_set_alloc (&follows, ndests + 1);
|
||||
if (dest_states == NULL || dest_states_word == NULL || dest_states_nl == NULL
|
||||
|| err != REG_NOERROR)
|
||||
return NULL;
|
||||
|
||||
/* Then build the states for all destinations. */
|
||||
for (i = 0; i < ndests; ++i)
|
||||
{
|
||||
@ -1569,7 +1719,9 @@ build_trtable (preg, state, fl_search)
|
||||
next_node = dfa->nexts[dests_node[i].elems[j]];
|
||||
if (next_node != -1)
|
||||
{
|
||||
re_node_set_merge (&follows, dfa->eclosures + next_node);
|
||||
err = re_node_set_merge (&follows, dfa->eclosures + next_node);
|
||||
if (err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
/* If search flag is set, merge the initial state. */
|
||||
@ -1585,17 +1737,28 @@ build_trtable (preg, state, fl_search)
|
||||
}
|
||||
if (!not_initial)
|
||||
#endif
|
||||
re_node_set_merge (&follows, dfa->init_state->entrance_nodes);
|
||||
{
|
||||
err = re_node_set_merge (&follows,
|
||||
dfa->init_state->entrance_nodes);
|
||||
if (err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
dest_states[i] = re_acquire_state_context (dfa, &follows, 0);
|
||||
}
|
||||
dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
|
||||
if (dest_states[i] == NULL && err != REG_NOERROR)
|
||||
return NULL;
|
||||
/* If the new state has context constraint,
|
||||
build appropriate states for these contexts. */
|
||||
if (dest_states[i]->has_constraint)
|
||||
{
|
||||
dest_states_word[i] = re_acquire_state_context (dfa, &follows,
|
||||
dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
|
||||
CONTEXT_WORD);
|
||||
dest_states_nl[i] = re_acquire_state_context (dfa, &follows,
|
||||
if (dest_states_word[i] == NULL && err != REG_NOERROR)
|
||||
return NULL;
|
||||
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
|
||||
CONTEXT_NEWLINE);
|
||||
if (dest_states_nl[i] == NULL && err != REG_NOERROR)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1654,6 +1817,7 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
re_node_set *dests_node;
|
||||
bitset *dests_ch;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
int i, j, k;
|
||||
int ndests; /* Number of the destinations from `state'. */
|
||||
@ -1750,12 +1914,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
{
|
||||
bitset_copy (dests_ch[ndests], remains);
|
||||
bitset_copy (dests_ch[j], intersec);
|
||||
re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
|
||||
err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
|
||||
if (err != REG_NOERROR)
|
||||
return -1;
|
||||
++ndests;
|
||||
}
|
||||
|
||||
/* Put the position in the current group. */
|
||||
re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
|
||||
err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
|
||||
/* If all characters are consumed, go to next node. */
|
||||
if (!not_consumed)
|
||||
@ -1765,7 +1933,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
if (j == ndests)
|
||||
{
|
||||
bitset_copy (dests_ch[ndests], accepts);
|
||||
re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
|
||||
err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
|
||||
if (err != REG_NOERROR)
|
||||
return -1;
|
||||
++ndests;
|
||||
bitset_empty (accepts);
|
||||
}
|
||||
@ -2028,7 +2198,7 @@ check_node_accept (preg, node, input, idx, eflags)
|
||||
|
||||
/* Functions for matching context. */
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
match_ctx_init (mctx, eflags, n)
|
||||
re_match_context_t *mctx;
|
||||
int eflags;
|
||||
@ -2037,12 +2207,17 @@ match_ctx_init (mctx, eflags, n)
|
||||
mctx->eflags = eflags;
|
||||
mctx->match_first = mctx->match_last = -1;
|
||||
if (n > 0)
|
||||
{
|
||||
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
|
||||
if (mctx->bkref_ents == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
else
|
||||
mctx->bkref_ents = NULL;
|
||||
mctx->nbkref_ents = 0;
|
||||
mctx->abkref_ents = n;
|
||||
mctx->max_bkref_len = 0;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -2054,7 +2229,7 @@ match_ctx_free (mctx)
|
||||
|
||||
/* Add a new backreference entry to the cache. */
|
||||
|
||||
static void
|
||||
static reg_errcode_t
|
||||
match_ctx_add_entry (mctx, node, from, to)
|
||||
re_match_context_t *mctx;
|
||||
int node, from, to;
|
||||
@ -2064,6 +2239,8 @@ match_ctx_add_entry (mctx, node, from, to)
|
||||
mctx->bkref_ents = re_realloc (mctx->bkref_ents,
|
||||
struct re_backref_cache_entry,
|
||||
mctx->abkref_ents * 2);
|
||||
if (mctx->bkref_ents == NULL)
|
||||
return REG_ESPACE;
|
||||
memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
|
||||
sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
|
||||
mctx->abkref_ents *= 2;
|
||||
@ -2073,4 +2250,5 @@ match_ctx_add_entry (mctx, node, from, to)
|
||||
mctx->bkref_ents[mctx->nbkref_ents++].to = to;
|
||||
if (mctx->max_bkref_len < to - from)
|
||||
mctx->max_bkref_len = to - from;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user