2
0
mirror of git://gcc.gnu.org/git/gcc.git synced 2025-03-24 13:11:44 +08:00

tree-ssa-loop-ivopts.c: Include tree-affine.h.

* tree-ssa-loop-ivopts.c: Include tree-affine.h.
	(divide): Removed.
	(constant_multiple_of): Fix order of operators for division.
	(aff_combination_const, aff_combination_elt, aff_combination_scale,
	aff_combination_add_elt, aff_combination_add, aff_combination_convert,
	tree_to_aff_combination, add_elt_to_tree, unshare_aff_combination,
	aff_combination_to_tree): Moved to tree-affine.c and made to work with
	double_int coefficients.
	(get_computation_aff, get_computation_at): Work with double_int
	coefficients.
	(get_computation_cost_at): Do not use divide.
	(rewrite_use_nonlinear_expr, rewrite_use_address, rewrite_use_compare):
	Assert that expressing the computation did not fail.
	* tree-ssa-address.c: Include tree-affine.h.
	(add_to_parts, most_expensive_mult_to_index, addr_to_parts,
	create_mem_ref): Work with double_int coefficients.
	* tree-affine.c: New file.
	* tree-affine.h: New file.
	* tree-flow.h (struct affine_tree_combination): Removed.
	* Makefile.in (tree-affine.o): Add.
	(tree-ssa-address.o, tree-ssa-loop-ivopts.o): Add tree-affine.h
	dependency.

From-SVN: r119854
This commit is contained in:
Zdenek Dvorak 2006-12-14 03:05:20 +01:00 committed by Zdenek Dvorak
parent 904e0e974d
commit 73f30c6308
7 changed files with 628 additions and 619 deletions

@ -1,3 +1,28 @@
2006-12-13 Zdenek Dvorak <dvorakz@suse.cz>
* tree-ssa-loop-ivopts.c: Include tree-affine.h.
(divide): Removed.
(constant_multiple_of): Fix order of operators for division.
(aff_combination_const, aff_combination_elt, aff_combination_scale,
aff_combination_add_elt, aff_combination_add, aff_combination_convert,
tree_to_aff_combination, add_elt_to_tree, unshare_aff_combination,
aff_combination_to_tree): Moved to tree-affine.c and made to work with
double_int coefficients.
(get_computation_aff, get_computation_at): Work with double_int
coefficients.
(get_computation_cost_at): Do not use divide.
(rewrite_use_nonlinear_expr, rewrite_use_address, rewrite_use_compare):
Assert that expressing the computation did not fail.
* tree-ssa-address.c: Include tree-affine.h.
(add_to_parts, most_expensive_mult_to_index, addr_to_parts,
create_mem_ref): Work with double_int coefficients.
* tree-affine.c: New file.
* tree-affine.h: New file.
* tree-flow.h (struct affine_tree_combination): Removed.
* Makefile.in (tree-affine.o): Add.
(tree-ssa-address.o, tree-ssa-loop-ivopts.o): Add tree-affine.h
dependency.
2006-12-13 Peter Bergner <bergner@vnet.ibm.com>
PR middle-end/30191

@ -988,7 +988,7 @@ OBJS-common = \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
tree-vect-patterns.o tree-ssa-loop-prefetch.o tree-ssa-coalesce.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \
tree-ssa-math-opts.o tree-affine.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \
@ -1992,7 +1992,7 @@ tree-ssa-address.o : tree-ssa-address.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(FLAGS_H) $(TREE_INLINE_H) $(RECOG_H) insn-config.h $(EXPR_H) \
gt-tree-ssa-address.h $(GGC_H)
gt-tree-ssa-address.h $(GGC_H) tree-affine.h
tree-ssa-loop-niter.o : tree-ssa-loop-niter.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \
$(TREE_INLINE_H) output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
@ -2018,7 +2018,10 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
tree-chrec.h $(VARRAY_H)
tree-chrec.h $(VARRAY_H) tree-affine.h
tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \
output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H)
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \

414
gcc/tree-affine.c Normal file

@ -0,0 +1,414 @@
/* Operations with affine combinations of trees.
Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "rtl.h"
#include "tm_p.h"
#include "hard-reg-set.h"
#include "output.h"
#include "diagnostic.h"
#include "tree-dump.h"
#include "tree-affine.h"
/* Extends CST as appropriate for the affine combinations COMB. */
double_int
double_int_ext_for_comb (double_int cst, aff_tree *comb)
{
return double_int_sext (cst, TYPE_PRECISION (comb->type));
}
/* Initializes affine combination COMB so that its value is zero in TYPE. */
static void
aff_combination_zero (aff_tree *comb, tree type)
{
comb->type = type;
comb->offset = double_int_zero;
comb->n = 0;
comb->rest = NULL_TREE;
}
/* Sets COMB to CST. */
void
aff_combination_const (aff_tree *comb, tree type, double_int cst)
{
aff_combination_zero (comb, type);
comb->offset = double_int_ext_for_comb (cst, comb);
}
/* Sets COMB to single element ELT. */
void
aff_combination_elt (aff_tree *comb, tree type, tree elt)
{
aff_combination_zero (comb, type);
comb->n = 1;
comb->elts[0].val = elt;
comb->elts[0].coef = double_int_one;
}
/* Scales COMB by SCALE. */
void
aff_combination_scale (aff_tree *comb, double_int scale)
{
unsigned i, j;
scale = double_int_ext_for_comb (scale, comb);
if (double_int_one_p (scale))
return;
if (double_int_zero_p (scale))
{
aff_combination_zero (comb, comb->type);
return;
}
comb->offset
= double_int_ext_for_comb (double_int_mul (scale, comb->offset), comb);
for (i = 0, j = 0; i < comb->n; i++)
{
double_int new_coef;
new_coef
= double_int_ext_for_comb (double_int_mul (scale, comb->elts[i].coef),
comb);
/* A coefficient may become zero due to overflow. Remove the zero
elements. */
if (double_int_zero_p (new_coef))
continue;
comb->elts[j].coef = new_coef;
comb->elts[j].val = comb->elts[i].val;
j++;
}
comb->n = j;
if (comb->rest)
{
if (comb->n < MAX_AFF_ELTS)
{
comb->elts[comb->n].coef = scale;
comb->elts[comb->n].val = comb->rest;
comb->rest = NULL_TREE;
comb->n++;
}
else
comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
double_int_to_tree (comb->type, scale));
}
}
/* Adds ELT * SCALE to COMB. */
void
aff_combination_add_elt (aff_tree *comb, tree elt, double_int scale)
{
unsigned i;
scale = double_int_ext_for_comb (scale, comb);
if (double_int_zero_p (scale))
return;
for (i = 0; i < comb->n; i++)
if (operand_equal_p (comb->elts[i].val, elt, 0))
{
double_int new_coef;
new_coef = double_int_add (comb->elts[i].coef, scale);
new_coef = double_int_ext_for_comb (new_coef, comb);
if (!double_int_zero_p (new_coef))
{
comb->elts[i].coef = new_coef;
return;
}
comb->n--;
comb->elts[i] = comb->elts[comb->n];
if (comb->rest)
{
gcc_assert (comb->n == MAX_AFF_ELTS - 1);
comb->elts[comb->n].coef = double_int_one;
comb->elts[comb->n].val = comb->rest;
comb->rest = NULL_TREE;
comb->n++;
}
return;
}
if (comb->n < MAX_AFF_ELTS)
{
comb->elts[comb->n].coef = scale;
comb->elts[comb->n].val = elt;
comb->n++;
return;
}
if (double_int_one_p (scale))
elt = fold_convert (comb->type, elt);
else
elt = fold_build2 (MULT_EXPR, comb->type,
fold_convert (comb->type, elt),
double_int_to_tree (comb->type, scale));
if (comb->rest)
comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
else
comb->rest = elt;
}
/* Adds COMB2 to COMB1. */
void
aff_combination_add (aff_tree *comb1, aff_tree *comb2)
{
unsigned i;
comb1->offset
= double_int_ext_for_comb (double_int_add (comb1->offset, comb2->offset),
comb1);
for (i = 0; i < comb2->n; i++)
aff_combination_add_elt (comb1, comb2->elts[i].val, comb2->elts[i].coef);
if (comb2->rest)
aff_combination_add_elt (comb1, comb2->rest, double_int_one);
}
/* Converts affine combination COMB to TYPE. */
void
aff_combination_convert (aff_tree *comb, tree type)
{
unsigned i, j;
tree comb_type = comb->type;
gcc_assert (TYPE_PRECISION (type) <= TYPE_PRECISION (comb_type));
comb->type = type;
if (comb->rest)
comb->rest = fold_convert (type, comb->rest);
if (TYPE_PRECISION (type) == TYPE_PRECISION (comb_type))
return;
comb->offset = double_int_ext_for_comb (comb->offset, comb);
for (i = j = 0; i < comb->n; i++)
{
double_int new_coef = double_int_ext_for_comb (comb->elts[i].coef, comb);
if (double_int_zero_p (new_coef))
continue;
comb->elts[j].coef = new_coef;
comb->elts[j].val = fold_convert (type, comb->elts[i].val);
j++;
}
comb->n = j;
if (comb->n < MAX_AFF_ELTS && comb->rest)
{
comb->elts[comb->n].coef = double_int_one;
comb->elts[comb->n].val = comb->rest;
comb->rest = NULL_TREE;
comb->n++;
}
}
/* Splits EXPR into an affine combination of parts. */
void
tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
{
aff_tree tmp;
enum tree_code code;
tree cst, core, toffset;
HOST_WIDE_INT bitpos, bitsize;
enum machine_mode mode;
int unsignedp, volatilep;
STRIP_NOPS (expr);
code = TREE_CODE (expr);
switch (code)
{
case INTEGER_CST:
aff_combination_const (comb, type, tree_to_double_int (expr));
return;
case PLUS_EXPR:
case MINUS_EXPR:
tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
if (code == MINUS_EXPR)
aff_combination_scale (&tmp, double_int_minus_one);
aff_combination_add (comb, &tmp);
return;
case MULT_EXPR:
cst = TREE_OPERAND (expr, 1);
if (TREE_CODE (cst) != INTEGER_CST)
break;
tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
aff_combination_scale (comb, tree_to_double_int (cst));
return;
case NEGATE_EXPR:
tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
aff_combination_scale (comb, double_int_minus_one);
return;
case ADDR_EXPR:
core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
&toffset, &mode, &unsignedp, &volatilep,
false);
if (bitpos % BITS_PER_UNIT != 0)
break;
aff_combination_const (comb, type,
uhwi_to_double_int (bitpos / BITS_PER_UNIT));
core = build_fold_addr_expr (core);
if (TREE_CODE (core) == ADDR_EXPR)
aff_combination_add_elt (comb, core, double_int_one);
else
{
tree_to_aff_combination (core, type, &tmp);
aff_combination_add (comb, &tmp);
}
if (toffset)
{
tree_to_aff_combination (toffset, type, &tmp);
aff_combination_add (comb, &tmp);
}
return;
default:
break;
}
aff_combination_elt (comb, type, expr);
}
/* Creates EXPR + ELT * SCALE in TYPE. EXPR is taken from affine
combination COMB. */
static tree
add_elt_to_tree (tree expr, tree type, tree elt, double_int scale,
aff_tree *comb)
{
enum tree_code code;
scale = double_int_ext_for_comb (scale, comb);
elt = fold_convert (type, elt);
if (double_int_one_p (scale))
{
if (!expr)
return elt;
return fold_build2 (PLUS_EXPR, type, expr, elt);
}
if (double_int_minus_one_p (scale))
{
if (!expr)
return fold_build1 (NEGATE_EXPR, type, elt);
return fold_build2 (MINUS_EXPR, type, expr, elt);
}
if (!expr)
return fold_build2 (MULT_EXPR, type, elt,
double_int_to_tree (type, scale));
if (double_int_negative_p (scale))
{
code = MINUS_EXPR;
scale = double_int_neg (scale);
}
else
code = PLUS_EXPR;
elt = fold_build2 (MULT_EXPR, type, elt,
double_int_to_tree (type, scale));
return fold_build2 (code, type, expr, elt);
}
/* Makes tree from the affine combination COMB. */
tree
aff_combination_to_tree (aff_tree *comb)
{
tree type = comb->type;
tree expr = comb->rest;
unsigned i;
double_int off, sgn;
gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
for (i = 0; i < comb->n; i++)
expr = add_elt_to_tree (expr, type, comb->elts[i].val, comb->elts[i].coef,
comb);
/* Ensure that we get x - 1, not x + (-1) or x + 0xff..f if x is
unsigned. */
if (double_int_negative_p (comb->offset))
{
off = double_int_neg (comb->offset);
sgn = double_int_minus_one;
}
else
{
off = comb->offset;
sgn = double_int_one;
}
return add_elt_to_tree (expr, type, double_int_to_tree (type, off), sgn,
comb);
}
/* Copies the tree elements of COMB to ensure that they are not shared. */
void
unshare_aff_combination (aff_tree *comb)
{
unsigned i;
for (i = 0; i < comb->n; i++)
comb->elts[i].val = unshare_expr (comb->elts[i].val);
if (comb->rest)
comb->rest = unshare_expr (comb->rest);
}
/* Remove M-th element from COMB. */
void
aff_combination_remove_elt (aff_tree *comb, unsigned m)
{
comb->n--;
if (m <= comb->n)
comb->elts[m] = comb->elts[comb->n];
if (comb->rest)
{
comb->elts[comb->n].coef = double_int_one;
comb->elts[comb->n].val = comb->rest;
comb->rest = NULL_TREE;
comb->n++;
}
}

71
gcc/tree-affine.h Normal file

@ -0,0 +1,71 @@
/* Operations with affine combinations of trees.
Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
/* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
to make things simpler; this is sufficient in most cases. */
#define MAX_AFF_ELTS 8
/* Element of an affine combination. */
struct aff_comb_elt
{
/* The value of the element. */
tree val;
/* Its coefficient in the combination. */
double_int coef;
};
typedef struct affine_tree_combination
{
/* Type of the result of the combination. */
tree type;
/* Constant offset. */
double_int offset;
/* Number of elements of the combination. */
unsigned n;
/* Elements and their coefficients. Type of elements may be different from
TYPE, but their sizes must be the same (STRIP_NOPS is applied to the
elements).
The coefficients are always sign extened from the precision of TYPE
(regardless of signedness of TYPE). */
struct aff_comb_elt elts[MAX_AFF_ELTS];
/* Remainder of the expression. Usually NULL, used only if there are more
than MAX_AFF_ELTS elements. Type of REST must be TYPE. */
tree rest;
} aff_tree;
double_int double_int_ext_for_comb (double_int, aff_tree *);
void aff_combination_const (aff_tree *, tree, double_int);
void aff_combination_elt (aff_tree *, tree, tree);
void aff_combination_scale (aff_tree *, double_int);
void aff_combination_add (aff_tree *, aff_tree *);
void aff_combination_add_elt (aff_tree *, tree, double_int);
void aff_combination_remove_elt (aff_tree *, unsigned);
void aff_combination_convert (aff_tree *, tree);
void tree_to_aff_combination (tree, tree, aff_tree *);
tree aff_combination_to_tree (aff_tree *);
void unshare_aff_combination (aff_tree *);

@ -1002,33 +1002,6 @@ extern void remove_unused_locals (void);
/* In tree-ssa-address.c */
/* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
to make things simpler; this is sufficient in most cases. */
#define MAX_AFF_ELTS 8
struct affine_tree_combination
{
/* Type of the result of the combination. */
tree type;
/* Mask modulo that the operations are performed. */
unsigned HOST_WIDE_INT mask;
/* Constant offset. */
unsigned HOST_WIDE_INT offset;
/* Number of elements of the combination. */
unsigned n;
/* Elements and their coefficients. */
tree elts[MAX_AFF_ELTS];
unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
/* Remainder of the expression. */
tree rest;
};
/* Description of a memory address. */
struct mem_address
@ -1036,6 +1009,7 @@ struct mem_address
tree symbol, base, index, step, offset;
};
struct affine_tree_combination;
tree create_mem_ref (block_stmt_iterator *, tree,
struct affine_tree_combination *);
rtx addr_for_mem_ref (struct mem_address *, bool);

@ -42,6 +42,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "recog.h"
#include "expr.h"
#include "ggc.h"
#include "tree-affine.h"
/* TODO -- handling of symbols (according to Richard Hendersons
comments, http://gcc.gnu.org/ml/gcc-patches/2005-04/msg00949.html):
@ -346,25 +347,20 @@ fixed_address_object_p (tree obj)
construct. */
static void
add_to_parts (struct mem_address *parts, tree type, tree elt,
unsigned HOST_WIDE_INT coef)
add_to_parts (struct mem_address *parts, tree type, tree elt)
{
tree elt_core = elt;
STRIP_NOPS (elt_core);
/* Check if this is a symbol. */
if (!parts->symbol
&& coef == 1
&& TREE_CODE (elt) == ADDR_EXPR
&& fixed_address_object_p (TREE_OPERAND (elt, 0)))
&& TREE_CODE (elt_core) == ADDR_EXPR
&& fixed_address_object_p (TREE_OPERAND (elt_core, 0)))
{
parts->symbol = TREE_OPERAND (elt, 0);
parts->symbol = TREE_OPERAND (elt_core, 0);
return;
}
if (coef != 1)
elt = fold_build2 (MULT_EXPR, type, fold_convert (type, elt),
build_int_cst_type (type, coef));
else
elt = fold_convert (type, elt);
if (!parts->base)
{
parts->base = elt;
@ -388,52 +384,69 @@ add_to_parts (struct mem_address *parts, tree type, tree elt,
static void
most_expensive_mult_to_index (struct mem_address *parts, tree type,
struct affine_tree_combination *addr)
aff_tree *addr)
{
unsigned HOST_WIDE_INT best_mult = 0;
HOST_WIDE_INT coef;
double_int best_mult, amult, amult_neg;
unsigned best_mult_cost = 0, acost;
tree mult_elt = NULL_TREE, elt;
unsigned i, j;
enum tree_code op_code;
best_mult = double_int_zero;
for (i = 0; i < addr->n; i++)
{
/* FIXME: Should use the correct memory mode rather than Pmode. */
if (addr->coefs[i] == 1
|| !multiplier_allowed_in_address_p (addr->coefs[i], Pmode))
if (!double_int_fits_in_shwi_p (addr->elts[i].coef))
continue;
acost = multiply_by_cost (addr->coefs[i], Pmode);
/* FIXME: Should use the correct memory mode rather than Pmode. */
coef = double_int_to_shwi (addr->elts[i].coef);
if (coef == 1
|| !multiplier_allowed_in_address_p (coef, Pmode))
continue;
acost = multiply_by_cost (coef, Pmode);
if (acost > best_mult_cost)
{
best_mult_cost = acost;
best_mult = addr->coefs[i];
best_mult = addr->elts[i].coef;
}
}
if (!best_mult)
if (!best_mult_cost)
return;
/* Collect elements multiplied by best_mult. */
for (i = j = 0; i < addr->n; i++)
{
if (addr->coefs[i] != best_mult)
amult = addr->elts[i].coef;
amult_neg = double_int_ext_for_comb (double_int_neg (amult), addr);
if (double_int_equal_p (amult, best_mult))
op_code = PLUS_EXPR;
else if (double_int_equal_p (amult_neg, best_mult))
op_code = MINUS_EXPR;
else
{
addr->coefs[j] = addr->coefs[i];
addr->elts[j] = addr->elts[i];
j++;
continue;
}
elt = fold_convert (type, addr->elts[i]);
if (!mult_elt)
elt = fold_convert (type, addr->elts[i].val);
if (mult_elt)
mult_elt = fold_build2 (op_code, type, mult_elt, elt);
else if (op_code == PLUS_EXPR)
mult_elt = elt;
else
mult_elt = fold_build2 (PLUS_EXPR, type, mult_elt, elt);
mult_elt = fold_build1 (NEGATE_EXPR, type, elt);
}
addr->n = j;
parts->index = mult_elt;
parts->step = build_int_cst_type (type, best_mult);
parts->step = double_int_to_tree (type, best_mult);
}
/* Splits address ADDR into PARTS.
@ -442,13 +455,13 @@ most_expensive_mult_to_index (struct mem_address *parts, tree type,
to PARTS. Some architectures do not support anything but single
register in address, possibly with a small integer offset; while
create_mem_ref will simplify the address to an acceptable shape
later, it would be a small bit more efficient to know that asking
for complicated addressing modes is useless. */
later, it would be more efficient to know that asking for complicated
addressing modes is useless. */
static void
addr_to_parts (struct affine_tree_combination *addr, tree type,
struct mem_address *parts)
addr_to_parts (aff_tree *addr, tree type, struct mem_address *parts)
{
tree part;
unsigned i;
parts->symbol = NULL_TREE;
@ -456,8 +469,8 @@ addr_to_parts (struct affine_tree_combination *addr, tree type,
parts->index = NULL_TREE;
parts->step = NULL_TREE;
if (addr->offset)
parts->offset = build_int_cst_type (type, addr->offset);
if (!double_int_zero_p (addr->offset))
parts->offset = double_int_to_tree (type, addr->offset);
else
parts->offset = NULL_TREE;
@ -467,9 +480,15 @@ addr_to_parts (struct affine_tree_combination *addr, tree type,
/* Then try to process the remaining elements. */
for (i = 0; i < addr->n; i++)
add_to_parts (parts, type, addr->elts[i], addr->coefs[i]);
{
part = fold_convert (type, addr->elts[i].val);
if (!double_int_one_p (addr->elts[i].coef))
part = fold_build2 (MULT_EXPR, type, part,
double_int_to_tree (type, addr->elts[i].coef));
add_to_parts (parts, type, part);
}
if (addr->rest)
add_to_parts (parts, type, addr->rest, 1);
add_to_parts (parts, type, addr->rest);
}
/* Force the PARTS to register. */
@ -490,8 +509,7 @@ gimplify_mem_ref_parts (block_stmt_iterator *bsi, struct mem_address *parts)
of created memory reference. */
tree
create_mem_ref (block_stmt_iterator *bsi, tree type,
struct affine_tree_combination *addr)
create_mem_ref (block_stmt_iterator *bsi, tree type, aff_tree *addr)
{
tree mem_ref, tmp;
tree addr_type = build_pointer_type (type);

@ -89,6 +89,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "cfgloop.h"
#include "params.h"
#include "langhooks.h"
#include "tree-affine.h"
/* The infinite cost. */
#define INFTY 10000000
@ -524,57 +525,6 @@ name_info (struct ivopts_data *data, tree name)
return ver_info (data, SSA_NAME_VERSION (name));
}
/* Checks whether there exists number X such that X * B = A, counting modulo
2^BITS. */
static bool
divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
HOST_WIDE_INT *x)
{
unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
unsigned HOST_WIDE_INT inv, ex, val;
unsigned i;
a &= mask;
b &= mask;
/* First divide the whole equation by 2 as long as possible. */
while (!(a & 1) && !(b & 1))
{
a >>= 1;
b >>= 1;
bits--;
mask >>= 1;
}
if (!(b & 1))
{
/* If b is still even, a is odd and there is no such x. */
return false;
}
/* Find the inverse of b. We compute it as
b^(2^(bits - 1) - 1) (mod 2^bits). */
inv = 1;
ex = b;
for (i = 0; i < bits - 1; i++)
{
inv = (inv * ex) & mask;
ex = (ex * ex) & mask;
}
val = (a * inv) & mask;
gcc_assert (((val * b) & mask) == a);
if ((val >> (bits - 1)) & 1)
val |= ~mask;
*x = val;
return true;
}
/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
emitted in LOOP. */
@ -2613,8 +2563,8 @@ constant_multiple_of (tree top, tree bot, double_int *mul)
if (TREE_CODE (bot) != INTEGER_CST)
return false;
p0 = double_int_sext (tree_to_double_int (bot), precision);
p1 = double_int_sext (tree_to_double_int (top), precision);
p0 = double_int_sext (tree_to_double_int (top), precision);
p1 = double_int_sext (tree_to_double_int (bot), precision);
if (double_int_zero_p (p1))
return false;
*mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
@ -2626,354 +2576,6 @@ constant_multiple_of (tree top, tree bot, double_int *mul)
}
}
/* Sets COMB to CST. */
static void
aff_combination_const (struct affine_tree_combination *comb, tree type,
unsigned HOST_WIDE_INT cst)
{
unsigned prec = TYPE_PRECISION (type);
comb->type = type;
comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
comb->n = 0;
comb->rest = NULL_TREE;
comb->offset = cst & comb->mask;
}
/* Sets COMB to single element ELT. */
static void
aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
{
unsigned prec = TYPE_PRECISION (type);
comb->type = type;
comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
comb->n = 1;
comb->elts[0] = elt;
comb->coefs[0] = 1;
comb->rest = NULL_TREE;
comb->offset = 0;
}
/* Scales COMB by SCALE. */
static void
aff_combination_scale (struct affine_tree_combination *comb,
unsigned HOST_WIDE_INT scale)
{
unsigned i, j;
if (scale == 1)
return;
if (scale == 0)
{
aff_combination_const (comb, comb->type, 0);
return;
}
comb->offset = (scale * comb->offset) & comb->mask;
for (i = 0, j = 0; i < comb->n; i++)
{
comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
comb->elts[j] = comb->elts[i];
if (comb->coefs[j] != 0)
j++;
}
comb->n = j;
if (comb->rest)
{
if (comb->n < MAX_AFF_ELTS)
{
comb->coefs[comb->n] = scale;
comb->elts[comb->n] = comb->rest;
comb->rest = NULL_TREE;
comb->n++;
}
else
comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
build_int_cst_type (comb->type, scale));
}
}
/* Adds ELT * SCALE to COMB. */
static void
aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
unsigned HOST_WIDE_INT scale)
{
unsigned i;
if (scale == 0)
return;
for (i = 0; i < comb->n; i++)
if (operand_equal_p (comb->elts[i], elt, 0))
{
comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
if (comb->coefs[i])
return;
comb->n--;
comb->coefs[i] = comb->coefs[comb->n];
comb->elts[i] = comb->elts[comb->n];
if (comb->rest)
{
gcc_assert (comb->n == MAX_AFF_ELTS - 1);
comb->coefs[comb->n] = 1;
comb->elts[comb->n] = comb->rest;
comb->rest = NULL_TREE;
comb->n++;
}
return;
}
if (comb->n < MAX_AFF_ELTS)
{
comb->coefs[comb->n] = scale;
comb->elts[comb->n] = elt;
comb->n++;
return;
}
if (scale == 1)
elt = fold_convert (comb->type, elt);
else
elt = fold_build2 (MULT_EXPR, comb->type,
fold_convert (comb->type, elt),
build_int_cst_type (comb->type, scale));
if (comb->rest)
comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
else
comb->rest = elt;
}
/* Adds COMB2 to COMB1. */
static void
aff_combination_add (struct affine_tree_combination *comb1,
struct affine_tree_combination *comb2)
{
unsigned i;
comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
for (i = 0; i < comb2->n; i++)
aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
if (comb2->rest)
aff_combination_add_elt (comb1, comb2->rest, 1);
}
/* Convert COMB to TYPE. */
static void
aff_combination_convert (tree type, struct affine_tree_combination *comb)
{
unsigned prec = TYPE_PRECISION (type);
unsigned i;
/* If the precision of both types is the same, it suffices to change the type
of the whole combination -- the elements are allowed to have another type
equivalent wrto STRIP_NOPS. */
if (prec == TYPE_PRECISION (comb->type))
{
comb->type = type;
return;
}
comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
comb->offset = comb->offset & comb->mask;
/* The type of the elements can be different from comb->type only as
much as what STRIP_NOPS would remove. We can just directly cast
to TYPE. */
for (i = 0; i < comb->n; i++)
comb->elts[i] = fold_convert (type, comb->elts[i]);
if (comb->rest)
comb->rest = fold_convert (type, comb->rest);
comb->type = type;
}
/* Splits EXPR into an affine combination of parts. */
static void
tree_to_aff_combination (tree expr, tree type,
struct affine_tree_combination *comb)
{
struct affine_tree_combination tmp;
enum tree_code code;
tree cst, core, toffset;
HOST_WIDE_INT bitpos, bitsize;
enum machine_mode mode;
int unsignedp, volatilep;
STRIP_NOPS (expr);
code = TREE_CODE (expr);
switch (code)
{
case INTEGER_CST:
aff_combination_const (comb, type, int_cst_value (expr));
return;
case PLUS_EXPR:
case MINUS_EXPR:
tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
if (code == MINUS_EXPR)
aff_combination_scale (&tmp, -1);
aff_combination_add (comb, &tmp);
return;
case MULT_EXPR:
cst = TREE_OPERAND (expr, 1);
if (TREE_CODE (cst) != INTEGER_CST)
break;
tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
aff_combination_scale (comb, int_cst_value (cst));
return;
case NEGATE_EXPR:
tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
aff_combination_scale (comb, -1);
return;
case ADDR_EXPR:
core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
&toffset, &mode, &unsignedp, &volatilep,
false);
if (bitpos % BITS_PER_UNIT != 0)
break;
aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
core = build_fold_addr_expr (core);
if (TREE_CODE (core) == ADDR_EXPR)
aff_combination_add_elt (comb, core, 1);
else
{
tree_to_aff_combination (core, type, &tmp);
aff_combination_add (comb, &tmp);
}
if (toffset)
{
tree_to_aff_combination (toffset, type, &tmp);
aff_combination_add (comb, &tmp);
}
return;
default:
break;
}
aff_combination_elt (comb, type, expr);
}
/* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
static tree
add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
unsigned HOST_WIDE_INT mask)
{
enum tree_code code;
scale &= mask;
elt = fold_convert (type, elt);
if (scale == 1)
{
if (!expr)
return elt;
return fold_build2 (PLUS_EXPR, type, expr, elt);
}
if (scale == mask)
{
if (!expr)
return fold_build1 (NEGATE_EXPR, type, elt);
return fold_build2 (MINUS_EXPR, type, expr, elt);
}
if (!expr)
return fold_build2 (MULT_EXPR, type, elt,
build_int_cst_type (type, scale));
if ((scale | (mask >> 1)) == mask)
{
/* Scale is negative. */
code = MINUS_EXPR;
scale = (-scale) & mask;
}
else
code = PLUS_EXPR;
elt = fold_build2 (MULT_EXPR, type, elt,
build_int_cst_type (type, scale));
return fold_build2 (code, type, expr, elt);
}
/* Copies the tree elements of COMB to ensure that they are not shared. */
static void
unshare_aff_combination (struct affine_tree_combination *comb)
{
unsigned i;
for (i = 0; i < comb->n; i++)
comb->elts[i] = unshare_expr (comb->elts[i]);
if (comb->rest)
comb->rest = unshare_expr (comb->rest);
}
/* Makes tree from the affine combination COMB. */
static tree
aff_combination_to_tree (struct affine_tree_combination *comb)
{
tree type = comb->type;
tree expr = comb->rest;
unsigned i;
unsigned HOST_WIDE_INT off, sgn;
if (comb->n == 0 && comb->offset == 0)
{
if (expr)
{
/* Handle the special case produced by get_computation_aff when
the type does not fit in HOST_WIDE_INT. */
return fold_convert (type, expr);
}
else
return build_int_cst (type, 0);
}
gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
for (i = 0; i < comb->n; i++)
expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
comb->mask);
if ((comb->offset | (comb->mask >> 1)) == comb->mask)
{
/* Offset is negative. */
off = (-comb->offset) & comb->mask;
sgn = comb->mask;
}
else
{
off = comb->offset;
sgn = 1;
}
return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
comb->mask);
}
/* Folds EXPR using the affine expressions framework. */
static tree
@ -3039,16 +2641,11 @@ get_computation_aff (struct loop *loop,
tree ubase = use->iv->base;
tree ustep = use->iv->step;
tree cbase = cand->iv->base;
tree cstep = cand->iv->step;
tree cstep = cand->iv->step, cstep_common;
tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
tree common_type;
tree common_type, var;
tree uutype;
tree expr, delta;
tree ratio;
unsigned HOST_WIDE_INT ustepi, cstepi;
HOST_WIDE_INT ratioi;
struct affine_tree_combination cbase_aff, expr_aff;
tree cstep_orig = cstep, ustep_orig = ustep;
aff_tree cbase_aff, var_aff;
double_int rat;
if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
@ -3057,65 +2654,19 @@ get_computation_aff (struct loop *loop,
return false;
}
expr = var_at_stmt (loop, cand, at);
var = var_at_stmt (loop, cand, at);
uutype = unsigned_type_for (utype);
if (TREE_TYPE (expr) != ctype)
/* If the conversion is not noop, perform it. */
if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
{
/* This may happen with the original ivs. */
expr = fold_convert (ctype, expr);
}
if (TYPE_UNSIGNED (utype))
uutype = utype;
else
{
uutype = unsigned_type_for (utype);
ubase = fold_convert (uutype, ubase);
ustep = fold_convert (uutype, ustep);
}
if (uutype != ctype)
{
expr = fold_convert (uutype, expr);
cbase = fold_convert (uutype, cbase);
cstep = fold_convert (uutype, cstep);
/* If the conversion is not noop, we must take it into account when
considering the value of the step. */
if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
cstep_orig = cstep;
cbase = fold_convert (uutype, cbase);
var = fold_convert (uutype, var);
}
if (cst_and_fits_in_hwi (cstep_orig)
&& cst_and_fits_in_hwi (ustep_orig))
{
ustepi = int_cst_value (ustep_orig);
cstepi = int_cst_value (cstep_orig);
if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
{
/* TODO maybe consider case when ustep divides cstep and the ratio is
a power of 2 (so that the division is fast to execute)? We would
need to be much more careful with overflows etc. then. */
return false;
}
ratio = build_int_cst_type (uutype, ratioi);
}
else
{
if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
return false;
ratio = double_int_to_tree (uutype, rat);
/* Ratioi is only used to detect special cases when the multiplicative
factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
set it to 0. */
if (double_int_fits_in_shwi_p (rat))
ratioi = double_int_to_shwi (rat);
else
ratioi = 0;
}
if (!constant_multiple_of (ustep, cstep, &rat))
return false;
/* In case both UBASE and CBASE are shortened to UUTYPE from some common
type, we achieve better folding by computing their difference in this
@ -3124,73 +2675,32 @@ get_computation_aff (struct loop *loop,
anyway. */
common_type = determine_common_wider_type (&ubase, &cbase);
/* We may need to shift the value if we are after the increment. */
if (stmt_after_increment (loop, cand, at))
{
if (uutype != common_type)
cstep = fold_convert (common_type, cstep);
cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
}
/* use = ubase - ratio * cbase + ratio * var.
In general case ubase + ratio * (var - cbase) could be better (one less
multiplication), but often it is possible to eliminate redundant parts
of computations from (ubase - ratio * cbase) term, and if it does not
happen, fold is able to apply the distributive law to obtain this form
anyway. */
if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
{
/* Let's compute in trees and just return the result in AFF. This case
should not be very common, and fold itself is not that bad either,
so making the aff. functions more complicated to handle this case
is not that urgent. */
if (ratioi == 1)
{
delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
if (uutype != common_type)
delta = fold_convert (uutype, delta);
expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
}
else if (ratioi == -1)
{
delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
if (uutype != common_type)
delta = fold_convert (uutype, delta);
expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
}
else
{
delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
if (uutype != common_type)
delta = fold_convert (uutype, delta);
expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
}
aff->type = uutype;
aff->n = 0;
aff->offset = 0;
aff->mask = 0;
aff->rest = expr;
return true;
}
/* If we got here, the types fits in HOST_WIDE_INT, thus it must be
possible to compute ratioi. */
gcc_assert (ratioi);
/* use = ubase - ratio * cbase + ratio * var. */
tree_to_aff_combination (ubase, common_type, aff);
tree_to_aff_combination (cbase, common_type, &cbase_aff);
tree_to_aff_combination (expr, uutype, &expr_aff);
aff_combination_scale (&cbase_aff, -ratioi);
aff_combination_scale (&expr_aff, ratioi);
tree_to_aff_combination (var, uutype, &var_aff);
/* We need to shift the value if we are after the increment. */
if (stmt_after_increment (loop, cand, at))
{
aff_tree cstep_aff;
if (common_type != uutype)
cstep_common = fold_convert (common_type, cstep);
else
cstep_common = cstep;
tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
aff_combination_add (&cbase_aff, &cstep_aff);
}
aff_combination_scale (&cbase_aff, double_int_neg (rat));
aff_combination_add (aff, &cbase_aff);
if (common_type != uutype)
aff_combination_convert (uutype, aff);
aff_combination_add (aff, &expr_aff);
aff_combination_convert (aff, uutype);
aff_combination_scale (&var_aff, rat);
aff_combination_add (aff, &var_aff);
return true;
}
@ -3202,7 +2712,7 @@ static tree
get_computation_at (struct loop *loop,
struct iv_use *use, struct iv_cand *cand, tree at)
{
struct affine_tree_combination aff;
aff_tree aff;
tree type = TREE_TYPE (use->iv->base);
if (!get_computation_aff (loop, use, cand, at, &aff))
@ -3862,10 +3372,11 @@ get_computation_cost_at (struct ivopts_data *data,
tree ubase = use->iv->base, ustep = use->iv->step;
tree cbase, cstep;
tree utype = TREE_TYPE (ubase), ctype;
unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
unsigned HOST_WIDE_INT cstepi, offset = 0;
HOST_WIDE_INT ratio, aratio;
bool var_present, symbol_present;
unsigned cost = 0, n_sums;
double_int rat;
*depends_on = NULL;
@ -3913,26 +3424,13 @@ get_computation_cost_at (struct ivopts_data *data,
else
cstepi = 0;
if (cst_and_fits_in_hwi (ustep)
&& cst_and_fits_in_hwi (cstep))
{
ustepi = int_cst_value (ustep);
if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
return INFTY;
}
else
{
double_int rat;
if (!constant_multiple_of (ustep, cstep, &rat))
return INFTY;
if (!constant_multiple_of (ustep, cstep, &rat))
return INFTY;
if (double_int_fits_in_shwi_p (rat))
ratio = double_int_to_shwi (rat);
else
return INFTY;
}
if (double_int_fits_in_shwi_p (rat))
ratio = double_int_to_shwi (rat);
else
return INFTY;
/* use = ubase + ratio * (var - cbase). If either cbase is a constant
or ratio == 1, it is better to handle this like
@ -5427,7 +4925,10 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data,
unshare_expr (step)));
}
else
comp = get_computation (data->current_loop, use, cand);
{
comp = get_computation (data->current_loop, use, cand);
gcc_assert (comp != NULL_TREE);
}
switch (TREE_CODE (use->stmt))
{
@ -5593,11 +5094,13 @@ static void
rewrite_use_address (struct ivopts_data *data,
struct iv_use *use, struct iv_cand *cand)
{
struct affine_tree_combination aff;
aff_tree aff;
block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
tree ref;
bool ok;
get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
gcc_assert (ok);
unshare_aff_combination (&aff);
ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
@ -5640,6 +5143,7 @@ rewrite_use_compare (struct ivopts_data *data,
/* The induction variable elimination failed; just express the original
giv. */
comp = get_computation (data->current_loop, use, cand);
gcc_assert (comp != NULL_TREE);
cond = *use->op_p;
op_p = &TREE_OPERAND (cond, 0);