mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-12 05:29:45 +08:00
common.opt: Add option ftree-vect-loop-version.
* common.opt: Add option ftree-vect-loop-version. * params.def: Add --param vect-max-version-checks. * doc/invoke.texi: Document ftree-vect-loop-version and --param vect-max-version-checks. * tree-vectorizer.h (_loop_vec_info): Add ptr_mask and may_misalign_stmts and defines for accessors. * tree-vectorizer.c : (new_loop_vec_info): VEC_alloc for LOOP_VINFO_MAY_MISALIGN_STMTS. (destroy_loop_vec_info): VEC_free for LOOP_VINFO_MAY_MISALIGN_STMTS. * tree-vect-analyze.c (vect_compute_data_ref_alignment): Update documentation. (vect_update_misalignment_for_peel): New. (vect_enhance_data_refs_alignment): Update to choose loop peeling or loop versioning if appropriate for the (potentially) unaligned data references in the loop. (vect_analyze_data_refs_alignment): Remove call to vect_enhance_data_refs_alignment so the checks can be done earlier. (vect_analyze_loop): Add call to vect_enhance_data_refs_alignment and move up call to vect_analyze_data_refs_alignment. * tree-vect-transform.c (vect_create_cond_for_align_checks): New. (vect_transform_loop): Add call to loop_version. From-SVN: r103941
This commit is contained in:
parent
4fe8db686f
commit
c12cc93047
@ -1,3 +1,29 @@
|
||||
2005-09-06 Keith Besaw <kbesaw@us.ibm.com>
|
||||
|
||||
* common.opt: Add option ftree-vect-loop-version.
|
||||
* params.def: Add --param vect-max-version-checks.
|
||||
* doc/invoke.texi: Document ftree-vect-loop-version and
|
||||
--param vect-max-version-checks.
|
||||
* tree-vectorizer.h (_loop_vec_info): Add ptr_mask and
|
||||
may_misalign_stmts and defines for accessors.
|
||||
* tree-vectorizer.c : (new_loop_vec_info): VEC_alloc for
|
||||
LOOP_VINFO_MAY_MISALIGN_STMTS.
|
||||
(destroy_loop_vec_info): VEC_free for
|
||||
LOOP_VINFO_MAY_MISALIGN_STMTS.
|
||||
* tree-vect-analyze.c (vect_compute_data_ref_alignment):
|
||||
Update documentation.
|
||||
(vect_update_misalignment_for_peel): New.
|
||||
(vect_enhance_data_refs_alignment): Update to choose loop
|
||||
peeling or loop versioning if appropriate for the (potentially)
|
||||
unaligned data references in the loop.
|
||||
(vect_analyze_data_refs_alignment): Remove call to
|
||||
vect_enhance_data_refs_alignment so the checks can be done
|
||||
earlier.
|
||||
(vect_analyze_loop): Add call to vect_enhance_data_refs_alignment
|
||||
and move up call to vect_analyze_data_refs_alignment.
|
||||
* tree-vect-transform.c (vect_create_cond_for_align_checks): New.
|
||||
(vect_transform_loop): Add call to loop_version.
|
||||
|
||||
2005-09-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/23098
|
||||
|
@ -998,6 +998,10 @@ ftree-vectorize
|
||||
Common Report Var(flag_tree_vectorize)
|
||||
Enable loop vectorization on trees
|
||||
|
||||
ftree-vect-loop-version
|
||||
Common Report Var(flag_tree_vect_loop_version) Init(1)
|
||||
Enable loop versioning when doing loop vectorization on trees
|
||||
|
||||
ftree-vectorizer-verbose=
|
||||
Common RejectNegative Joined
|
||||
-ftree-vectorizer-verbose=<number> Set the verbosity level of the vectorizer
|
||||
|
@ -337,7 +337,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
|
||||
-ftree-dominator-opts -ftree-dse -ftree-copyrename -ftree-sink @gol
|
||||
-ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
|
||||
-ftree-salias -fweb @gol
|
||||
-ftree-vect-loop-version -ftree-salias -fweb @gol
|
||||
-ftree-copy-prop -ftree-store-ccp -ftree-store-copy-prop -fwhole-program @gol
|
||||
--param @var{name}=@var{value}
|
||||
-O -O0 -O1 -O2 -O3 -Os}
|
||||
@ -4391,7 +4391,8 @@ optimizations designed to reduce code size.
|
||||
|
||||
@option{-Os} disables the following optimization flags:
|
||||
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
|
||||
-falign-labels -freorder-blocks -freorder-blocks-and-partition -fprefetch-loop-arrays}
|
||||
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
|
||||
-fprefetch-loop-arrays -ftree-vect-loop-version}
|
||||
|
||||
If you use multiple @option{-O} options, with or without level numbers,
|
||||
the last such option is the one that is effective.
|
||||
@ -5038,6 +5039,15 @@ optimization later. This is enabled by default at @option{-O} and higher.
|
||||
@item -ftree-vectorize
|
||||
Perform loop vectorization on trees.
|
||||
|
||||
@item -ftree-vect-loop-version
|
||||
@opindex ftree-vect-loop-version
|
||||
Perform loop versioning when doing loop vectorization on trees. When a loop
|
||||
appears to be vectorizable except that data alignment or data dependence cannot
|
||||
be determined at compile time then vectorized and non-vectorized versions of
|
||||
the loop are generated along with runtime checks for alignment or dependence
|
||||
to control which version is executed. This option is enabled by default
|
||||
except at level @option{-Os} where it is disabled.
|
||||
|
||||
@item -ftree-vrp
|
||||
Perform Value Range Propagation on trees. This is similar to the
|
||||
constant propagation pass, but instead of values, ranges of values are
|
||||
@ -5883,6 +5893,11 @@ optimization when a new iv is added to the set.
|
||||
Bound on size of expressions used in the scalar evolutions analyzer.
|
||||
Large expressions slow the analyzer.
|
||||
|
||||
@item vect-max-version-checks
|
||||
The maxinum number of runtime checks that can be performed when doing
|
||||
loop versioning in the vectorizer. See option ftree-vect-loop-version
|
||||
for more information.
|
||||
|
||||
@item max-iterations-to-track
|
||||
|
||||
The maximum number of iterations of a loop the brute force algorithm
|
||||
|
@ -398,6 +398,11 @@ DEFPARAM(PARAM_SCEV_MAX_EXPR_SIZE,
|
||||
"Bound on size of expressions used in the scalar evolutions analyzer",
|
||||
20, 0, 0)
|
||||
|
||||
DEFPARAM(PARAM_VECT_MAX_VERSION_CHECKS,
|
||||
"vect-max-version-checks",
|
||||
"Bound on number of runtime checks inserted by the vectorizer's loop versioning",
|
||||
6, 0, 0)
|
||||
|
||||
/* The product of the next two is used to decide whether or not to
|
||||
use .GLOBAL_VAR. See tree-dfa.c. */
|
||||
DEFPARAM(PARAM_GLOBAL_VAR_THRESHOLD,
|
||||
|
@ -1,3 +1,19 @@
|
||||
2005-09-06 Keith Besaw <kbesaw@us.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-29.c: Update xfail testing.
|
||||
* gcc.dg/vect/vect-44.c: Same.
|
||||
* gcc.dg/vect/vect-48.c: Same.
|
||||
* gcc.dg/vect/vect-50.c: Same.
|
||||
* gcc.dg/vect/vect-72.c: Same.
|
||||
* gcc.dg/vect/vect-77.c: Same.
|
||||
* gcc.dg/vect/vect-78.c: Same.
|
||||
* gcc.dg/vect/vect-80.c: Same.
|
||||
* gcc.dg/vect/vect-96.c: Same.
|
||||
* gcc.dg/vect/vect-100.c: Same.
|
||||
* gcc.dg/vect/vect-exp: Add default flags for Os-vect-*.
|
||||
* gfortran.dg/vect/vect-2.f90: Update xfail testing.
|
||||
* gfortran.dg/vect/vect-5.f90: Same.
|
||||
|
||||
2005-09-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.dg/debug/dwarf2/dwarf-char1.c: Accept more assembler comment
|
||||
|
@ -21,7 +21,7 @@ int main1 () {
|
||||
|
||||
p = (struct extraction *) malloc (sizeof (struct extraction));
|
||||
|
||||
/* Not vectorizable: p may alias a and/or b, since they are globals. */
|
||||
/* Vectorizable: alias analysis determines that p can't point to a and/or b. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
p->a[i] = a[i];
|
||||
@ -73,6 +73,6 @@ int main (void)
|
||||
}
|
||||
|
||||
/* Requires versioning. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 0 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
@ -43,7 +43,11 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* For targets that don't support misaligned loads we version for the load.
|
||||
(The store is aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" {target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -51,7 +51,12 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* For targets that don't support misaligned loads we version for the
|
||||
all three accesses (peeling to align the store will not force the
|
||||
two loads to be aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -52,7 +52,11 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* For targets that don't support misaligned loads we version for the two loads.
|
||||
(The store is aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -49,7 +49,12 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* For targets that don't support misaligned loads we version for the
|
||||
all three accesses (peeling to align the store will not force the
|
||||
two loads to be aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -51,7 +51,11 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* For targets that don't support misaligned loads we version for the two loads.
|
||||
(The store is aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -43,7 +43,11 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align } } } } */
|
||||
/* For targets that don't support misaligned loads we version for the load.
|
||||
(The store is aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -44,7 +44,11 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align } } } } */
|
||||
/* For targets that don't support misaligned loads we version for the load.
|
||||
(The store is aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -43,7 +43,12 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* For targets that don't support misaligned loads we version for the
|
||||
all three accesses (peeling to align the store will not force the
|
||||
two loads to be aligned). */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -37,7 +37,12 @@ int main (void)
|
||||
return main1 (8);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* The store is unaligned, the load is aligned. For targets that support unaligned
|
||||
loads, peel to align the store and generated unaligned access for the loads.
|
||||
For targets that don't support unaligned loads, version for the store. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -102,6 +102,11 @@ lappend DEFAULT_VECTCFLAGS "-ftrapv"
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/trapv-vect*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
# With -Os
|
||||
lappend DEFAULT_VECTCFLAGS "-Os"
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/Os-vect-*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
# Clean up.
|
||||
set dg-do-what-default ${save-dg-do-what-default}
|
||||
|
||||
|
@ -8,7 +8,15 @@ A = LOG(X); B = LOG(Y); C = A + B
|
||||
PRINT*, C(500000)
|
||||
END
|
||||
|
||||
! { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_align } } }
|
||||
! First loop (A=LOG(X)) is vectorized using peeling to align the store.
|
||||
! Same for the second loop (B=LOG(Y)).
|
||||
! Third loop (C = A + B) is vectorized using versioning (for targets that don't
|
||||
! support unaligned loads) or using peeling to align the store (on targets that
|
||||
! support unaligned loads).
|
||||
|
||||
! { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } }
|
||||
! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_no_align } } }
|
||||
! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target vect_no_align } } }
|
||||
! { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } }
|
||||
! { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" {target vect_no_align } } }
|
||||
! { dg-final { cleanup-tree-dump "vect" } }
|
||||
|
@ -35,9 +35,10 @@
|
||||
stop
|
||||
end
|
||||
|
||||
! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail { vect_no_align || lp64 } } } }
|
||||
! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail { lp64 } } } }
|
||||
! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || lp64 } } } }
|
||||
! { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || lp64 } } } }
|
||||
! { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" {target vect_no_align } } }
|
||||
|
||||
! We also expect to vectorize one loop for lp64 targets that support
|
||||
! misaligned access:
|
||||
|
@ -33,6 +33,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
#include "cfgloop.h"
|
||||
#include "expr.h"
|
||||
#include "optabs.h"
|
||||
#include "params.h"
|
||||
#include "tree-chrec.h"
|
||||
#include "tree-data-ref.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
@ -47,7 +48,7 @@ static bool vect_analyze_data_ref_accesses (loop_vec_info);
|
||||
static bool vect_analyze_data_ref_dependences (loop_vec_info);
|
||||
static bool vect_analyze_data_refs_alignment (loop_vec_info);
|
||||
static bool vect_compute_data_refs_alignment (loop_vec_info);
|
||||
static void vect_enhance_data_refs_alignment (loop_vec_info);
|
||||
static bool vect_enhance_data_refs_alignment (loop_vec_info);
|
||||
static bool vect_analyze_operations (loop_vec_info);
|
||||
static bool vect_determine_vectorization_factor (loop_vec_info);
|
||||
|
||||
@ -61,6 +62,9 @@ static bool vect_analyze_data_ref_dependence
|
||||
static bool vect_compute_data_ref_alignment (struct data_reference *);
|
||||
static bool vect_analyze_data_ref_access (struct data_reference *);
|
||||
static bool vect_can_advance_ivs_p (loop_vec_info);
|
||||
static void vect_update_misalignment_for_peel
|
||||
(struct data_reference *, struct data_reference *, int npeel);
|
||||
|
||||
|
||||
/* Function vect_determine_vectorization_factor
|
||||
|
||||
@ -370,7 +374,8 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
|
||||
}
|
||||
|
||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
|| LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
|
||||
|| LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
|
||||
|| LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "epilog loop required.");
|
||||
@ -791,11 +796,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
|
||||
/* Function vect_compute_data_refs_alignment
|
||||
|
||||
Compute the misalignment of data references in the loop.
|
||||
This pass may take place at function granularity instead of at loop
|
||||
granularity.
|
||||
|
||||
FOR NOW: No analysis is actually performed. Misalignment is calculated
|
||||
only for trivial cases. TODO. */
|
||||
Return FALSE if a data reference is found that cannot be vectorized. */
|
||||
|
||||
static bool
|
||||
vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
@ -814,6 +815,93 @@ vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_update_misalignment_for_peel
|
||||
|
||||
DR - the data reference whose misalignment is to be adjusted.
|
||||
DR_PEEL - the data reference whose misalignment is being made
|
||||
zero in the vector loop by the peel.
|
||||
NPEEL - the number of iterations in the peel loop if the misalignment
|
||||
of DR_PEEL is known at compile time. */
|
||||
|
||||
static void
|
||||
vect_update_misalignment_for_peel (struct data_reference *dr,
|
||||
struct data_reference *dr_peel, int npeel)
|
||||
{
|
||||
unsigned int i;
|
||||
int drsize;
|
||||
VEC(dr_p,heap) *same_align_drs;
|
||||
struct data_reference *current_dr;
|
||||
|
||||
if (known_alignment_for_access_p (dr)
|
||||
&& DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
|
||||
{
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* It can be assumed that the data refs with the same alignment as dr_peel
|
||||
are aligned in the vector loop. */
|
||||
same_align_drs
|
||||
= STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
|
||||
for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
|
||||
{
|
||||
if (current_dr != dr)
|
||||
continue;
|
||||
gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (known_alignment_for_access_p (dr)
|
||||
&& known_alignment_for_access_p (dr_peel))
|
||||
{
|
||||
drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
|
||||
DR_MISALIGNMENT (dr) += npeel * drsize;
|
||||
DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
|
||||
return;
|
||||
}
|
||||
|
||||
DR_MISALIGNMENT (dr) = -1;
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_verify_datarefs_alignment
|
||||
|
||||
Return TRUE if all data references in the loop can be
|
||||
handled with respect to alignment. */
|
||||
|
||||
static bool
|
||||
vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
|
||||
{
|
||||
varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
|
||||
enum dr_alignment_support supportable_dr_alignment;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
supportable_dr_alignment = vect_supportable_dr_alignment (dr);
|
||||
if (!supportable_dr_alignment)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
|
||||
{
|
||||
if (DR_IS_READ (dr))
|
||||
fprintf (vect_dump,
|
||||
"not vectorized: unsupported unaligned load.");
|
||||
else
|
||||
fprintf (vect_dump,
|
||||
"not vectorized: unsupported unaligned store.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (supportable_dr_alignment != dr_aligned
|
||||
&& vect_print_dump_info (REPORT_ALIGNMENT))
|
||||
fprintf (vect_dump, "Vectorizing an unaligned access.");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_enhance_data_refs_alignment
|
||||
|
||||
This pass will use loop versioning and loop peeling in order to enhance
|
||||
@ -822,42 +910,30 @@ vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
FOR NOW: we assume that whatever versioning/peeling takes place, only the
|
||||
original loop is to be vectorized; Any other loops that are created by
|
||||
the transformations performed in this pass - are not supposed to be
|
||||
vectorized. This restriction will be relaxed. */
|
||||
vectorized. This restriction will be relaxed.
|
||||
|
||||
static void
|
||||
vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
{
|
||||
varray_type loop_datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
|
||||
varray_type datarefs;
|
||||
VEC(dr_p,heap) *same_align_drs;
|
||||
struct data_reference *dr0 = NULL;
|
||||
struct data_reference *dr;
|
||||
unsigned int i, j;
|
||||
bool check_loads;
|
||||
This pass will require a cost model to guide it whether to apply peeling
|
||||
or versioning or a combination of the two. For example, the scheme that
|
||||
intel uses when given a loop with several memory accesses, is as follows:
|
||||
choose one memory access ('p') which alignment you want to force by doing
|
||||
peeling. Then, either (1) generate a loop in which 'p' is aligned and all
|
||||
other accesses are not necessarily aligned, or (2) use loop versioning to
|
||||
generate one loop in which all accesses are aligned, and another loop in
|
||||
which only 'p' is necessarily aligned.
|
||||
|
||||
/*
|
||||
This pass will require a cost model to guide it whether to apply peeling
|
||||
or versioning or a combination of the two. For example, the scheme that
|
||||
intel uses when given a loop with several memory accesses, is as follows:
|
||||
choose one memory access ('p') which alignment you want to force by doing
|
||||
peeling. Then, either (1) generate a loop in which 'p' is aligned and all
|
||||
other accesses are not necessarily aligned, or (2) use loop versioning to
|
||||
generate one loop in which all accesses are aligned, and another loop in
|
||||
which only 'p' is necessarily aligned.
|
||||
("Automatic Intra-Register Vectorization for the Intel Architecture",
|
||||
Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
|
||||
Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
|
||||
|
||||
("Automatic Intra-Register Vectorization for the Intel Architecture",
|
||||
Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
|
||||
Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
|
||||
Devising a cost model is the most critical aspect of this work. It will
|
||||
guide us on which access to peel for, whether to use loop versioning, how
|
||||
many versions to create, etc. The cost model will probably consist of
|
||||
generic considerations as well as target specific considerations (on
|
||||
powerpc for example, misaligned stores are more painful than misaligned
|
||||
loads).
|
||||
|
||||
Devising a cost model is the most critical aspect of this work. It will
|
||||
guide us on which access to peel for, whether to use loop versioning, how
|
||||
many versions to create, etc. The cost model will probably consist of
|
||||
generic considerations as well as target specific considerations (on
|
||||
powerpc for example, misaligned stores are more painful than misaligned
|
||||
loads).
|
||||
Here are the general steps involved in alignment enhancements:
|
||||
|
||||
Here is the general steps involved in alignment enhancements:
|
||||
|
||||
-- original loop, before alignment analysis:
|
||||
for (i=0; i<N; i++){
|
||||
x = q[i]; # DR_MISALIGNMENT(q) = unknown
|
||||
@ -876,14 +952,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
x = q[i]; # DR_MISALIGNMENT(q) = 3
|
||||
p[i] = y; # DR_MISALIGNMENT(p) = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i=0; i<N; i++){ # loop 1B
|
||||
x = q[i]; # DR_MISALIGNMENT(q) = 3
|
||||
p[i] = y; # DR_MISALIGNMENT(p) = unaligned
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
-- Possibility 2: we do loop peeling:
|
||||
for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
|
||||
x = q[i];
|
||||
@ -900,11 +976,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
p[i] = y;
|
||||
}
|
||||
if (p is aligned) {
|
||||
for (i = 3; i<N; i++){ # loop 3A
|
||||
for (i = 3; i<N; i++){ # loop 3A
|
||||
x = q[i]; # DR_MISALIGNMENT(q) = 0
|
||||
p[i] = y; # DR_MISALIGNMENT(p) = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 3; i<N; i++){ # loop 3B
|
||||
x = q[i]; # DR_MISALIGNMENT(q) = 0
|
||||
@ -912,11 +988,45 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
}
|
||||
}
|
||||
|
||||
These loops are later passed to loop_transform to be vectorized. The
|
||||
vectorizer will use the alignment information to guide the transformation
|
||||
(whether to generate regular loads/stores, or with special handling for
|
||||
misalignment).
|
||||
*/
|
||||
These loops are later passed to loop_transform to be vectorized. The
|
||||
vectorizer will use the alignment information to guide the transformation
|
||||
(whether to generate regular loads/stores, or with special handling for
|
||||
misalignment). */
|
||||
|
||||
static bool
|
||||
vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
{
|
||||
varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
|
||||
enum dr_alignment_support supportable_dr_alignment;
|
||||
struct data_reference *dr0 = NULL;
|
||||
struct data_reference *dr;
|
||||
unsigned int i;
|
||||
bool do_peeling = false;
|
||||
bool do_versioning = false;
|
||||
bool stat;
|
||||
|
||||
/* While cost model enhancements are expected in the future, the high level
|
||||
view of the code at this time is as follows:
|
||||
|
||||
A) If there is a misaligned write then see if peeling to align this write
|
||||
can make all data references satisfy vect_supportable_dr_alignment.
|
||||
If so, update data structures as needed and return true. Note that
|
||||
at this time vect_supportable_dr_alignment is known to return false
|
||||
for a a misaligned write.
|
||||
|
||||
B) If peeling wasn't possible and there is a data reference with an
|
||||
unknown misalignment that does not satisfy vect_supportable_dr_alignment
|
||||
then see if loop versioning checks can be used to make all data
|
||||
references satisfy vect_supportable_dr_alignment. If so, update
|
||||
data structures as needed and return true.
|
||||
|
||||
C) If neither peeling nor versioning were successful then return false if
|
||||
any data reference does not satisfy vect_supportable_dr_alignment.
|
||||
|
||||
D) Return true (all data references satisfy vect_supportable_dr_alignment).
|
||||
|
||||
Note, Possibility 3 above (which is peeling and versioning together) is not
|
||||
being done at this time. */
|
||||
|
||||
/* (1) Peeling to force alignment. */
|
||||
|
||||
@ -932,106 +1042,210 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
misaligned store in the loop.
|
||||
Rationale: misaligned stores are not yet supported.
|
||||
|
||||
TODO: Use a better cost model. */
|
||||
TODO: Use a cost model. */
|
||||
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_datarefs); i++)
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
dr0 = VARRAY_GENERIC_PTR (loop_datarefs, i);
|
||||
if (!DR_IS_READ (dr0) && !aligned_access_p (dr0))
|
||||
{
|
||||
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
|
||||
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
|
||||
break;
|
||||
}
|
||||
dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
if (!DR_IS_READ (dr) && !aligned_access_p (dr))
|
||||
{
|
||||
dr0 = dr;
|
||||
do_peeling = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* (1.2) Update the alignment info according to the peeling factor.
|
||||
If the misalignment of the DR we peel for is M, then the
|
||||
peeling factor is VF - M, and the misalignment of each access DR_i
|
||||
in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
|
||||
If the misalignment of the DR we peel for is unknown, then the
|
||||
misalignment of each access DR_i in the loop is also unknown.
|
||||
/* Often peeling for alignment will require peeling for loop-bound, which in
|
||||
turn requires that we know how to adjust the loop ivs after the loop. */
|
||||
if (!vect_can_advance_ivs_p (loop_vinfo))
|
||||
do_peeling = false;
|
||||
|
||||
TODO: - consider accesses that are known to have the same
|
||||
alignment, even if that alignment is unknown. */
|
||||
|
||||
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
|
||||
if (do_peeling)
|
||||
{
|
||||
int mis;
|
||||
int npeel = 0;
|
||||
|
||||
if (known_alignment_for_access_p (dr0))
|
||||
{
|
||||
/* Since it's known at compile time, compute the number of iterations
|
||||
in the peeled loop (the peeling factor) for use in updating
|
||||
DR_MISALIGNMENT values. The peeling factor is the vectorization
|
||||
factor minus the misalignment as an element count. */
|
||||
mis = DR_MISALIGNMENT (dr0);
|
||||
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
|
||||
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
|
||||
}
|
||||
|
||||
datarefs = loop_datarefs;
|
||||
check_loads = false;
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
|
||||
if (dr == dr0 || (!check_loads && DR_IS_READ (dr)))
|
||||
continue;
|
||||
if (known_alignment_for_access_p (dr)
|
||||
&& DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
else if (known_alignment_for_access_p (dr)
|
||||
&& known_alignment_for_access_p (dr0))
|
||||
{
|
||||
int drsize =
|
||||
GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
|
||||
|
||||
DR_MISALIGNMENT (dr) += npeel * drsize;
|
||||
DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
|
||||
}
|
||||
else
|
||||
DR_MISALIGNMENT (dr) = -1;
|
||||
}
|
||||
check_loads = true;
|
||||
}
|
||||
|
||||
same_align_drs =
|
||||
STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr0)));
|
||||
for (i = 0; VEC_iterate (dr_p, same_align_drs, i, dr); i++)
|
||||
{
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
/* Since it's known at compile time, compute the number of iterations
|
||||
in the peeled loop (the peeling factor) for use in updating
|
||||
DR_MISALIGNMENT values. The peeling factor is the vectorization
|
||||
factor minus the misalignment as an element count. */
|
||||
mis = DR_MISALIGNMENT (dr0);
|
||||
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
|
||||
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
|
||||
}
|
||||
|
||||
DR_MISALIGNMENT (dr0) = 0;
|
||||
/* Ensure that all data refs can be vectorized after the peel. */
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
int save_misalignment;
|
||||
|
||||
dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
if (dr == dr0)
|
||||
continue;
|
||||
save_misalignment = DR_MISALIGNMENT (dr);
|
||||
vect_update_misalignment_for_peel (dr, dr0, npeel);
|
||||
supportable_dr_alignment = vect_supportable_dr_alignment (dr);
|
||||
DR_MISALIGNMENT (dr) = save_misalignment;
|
||||
|
||||
if (!supportable_dr_alignment)
|
||||
{
|
||||
do_peeling = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_peeling)
|
||||
{
|
||||
/* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
|
||||
If the misalignment of DR_i is identical to that of dr0 then set
|
||||
DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
|
||||
dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
|
||||
by the peeling factor times the element size of DR_i (MOD the
|
||||
vectorization factor times the size). Otherwise, the
|
||||
misalignment of DR_i must be set to unknown. */
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
if (dr == dr0)
|
||||
continue;
|
||||
vect_update_misalignment_for_peel (dr, dr0, npeel);
|
||||
}
|
||||
|
||||
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
|
||||
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
|
||||
DR_MISALIGNMENT (dr0) = 0;
|
||||
if (vect_print_dump_info (REPORT_ALIGNMENT))
|
||||
fprintf (vect_dump, "Alignment of access forced using peeling.");
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Peeling for alignment will be applied.");
|
||||
|
||||
stat = vect_verify_datarefs_alignment (loop_vinfo);
|
||||
gcc_assert (stat);
|
||||
return stat;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* (2) Versioning to force alignment. */
|
||||
|
||||
/* Try versioning if:
|
||||
1) flag_tree_vect_loop_version is TRUE
|
||||
2) optimize_size is FALSE
|
||||
3) there is at least one unsupported misaligned data ref with an unknown
|
||||
misalignment, and
|
||||
4) all misaligned data refs with a known misalignment are supported, and
|
||||
5) the number of runtime alignment checks is within reason. */
|
||||
|
||||
do_versioning = flag_tree_vect_loop_version && (!optimize_size);
|
||||
|
||||
if (do_versioning)
|
||||
{
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
|
||||
if (aligned_access_p (dr))
|
||||
continue;
|
||||
|
||||
supportable_dr_alignment = vect_supportable_dr_alignment (dr);
|
||||
|
||||
if (!supportable_dr_alignment)
|
||||
{
|
||||
tree stmt;
|
||||
int mask;
|
||||
tree vectype;
|
||||
|
||||
if (known_alignment_for_access_p (dr)
|
||||
|| VEC_length (tree,
|
||||
LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
|
||||
>= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
|
||||
{
|
||||
do_versioning = false;
|
||||
break;
|
||||
}
|
||||
|
||||
stmt = DR_STMT (dr);
|
||||
vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
|
||||
gcc_assert (vectype);
|
||||
|
||||
/* The rightmost bits of an aligned address must be zeros.
|
||||
Construct the mask needed for this test. For example,
|
||||
GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
|
||||
mask must be 15 = 0xf. */
|
||||
mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
|
||||
|
||||
/* FORNOW: use the same mask to test all potentially unaligned
|
||||
references in the loop. The vectorizer currently supports
|
||||
a single vector size, see the reference to
|
||||
GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
|
||||
vectorization factor is computed. */
|
||||
gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
|
||||
|| LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
|
||||
LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
|
||||
VEC_safe_push (tree, heap,
|
||||
LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
|
||||
DR_STMT (dr));
|
||||
}
|
||||
}
|
||||
|
||||
/* Versioning requires at least one misaligned data reference. */
|
||||
if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
|
||||
do_versioning = false;
|
||||
else if (!do_versioning)
|
||||
VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
|
||||
}
|
||||
|
||||
if (do_versioning)
|
||||
{
|
||||
VEC(tree,heap) *may_misalign_stmts
|
||||
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
|
||||
tree stmt;
|
||||
|
||||
/* It can now be assumed that the data references in the statements
|
||||
in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
|
||||
of the loop being vectorized. */
|
||||
for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
dr = STMT_VINFO_DATA_REF (stmt_info);
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
if (vect_print_dump_info (REPORT_ALIGNMENT))
|
||||
fprintf (vect_dump, "Alignment of access forced using versioning.");
|
||||
}
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Versioning for alignment will be applied.");
|
||||
|
||||
/* Peeling and versioning can't be done together at this time. */
|
||||
gcc_assert (! (do_peeling && do_versioning));
|
||||
|
||||
stat = vect_verify_datarefs_alignment (loop_vinfo);
|
||||
gcc_assert (stat);
|
||||
return stat;
|
||||
}
|
||||
|
||||
/* This point is reached if neither peeling nor versioning is being done. */
|
||||
gcc_assert (! (do_peeling || do_versioning));
|
||||
|
||||
stat = vect_verify_datarefs_alignment (loop_vinfo);
|
||||
return stat;
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_analyze_data_refs_alignment
|
||||
|
||||
Analyze the alignment of the data-references in the loop.
|
||||
FOR NOW: Until support for misaligned accesses is in place, only if all
|
||||
accesses are aligned can the loop be vectorized. This restriction will be
|
||||
relaxed. */
|
||||
Return FALSE if a data reference is found that cannot be vectorized. */
|
||||
|
||||
static bool
|
||||
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
{
|
||||
varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
|
||||
enum dr_alignment_support supportable_dr_alignment;
|
||||
unsigned int i;
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
|
||||
|
||||
|
||||
/* This pass may take place at function granularity instead of at loop
|
||||
granularity. */
|
||||
|
||||
if (!vect_compute_data_refs_alignment (loop_vinfo))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
|
||||
@ -1040,40 +1254,6 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* This pass will decide on using loop versioning and/or loop peeling in
|
||||
order to enhance the alignment of data references in the loop. */
|
||||
|
||||
vect_enhance_data_refs_alignment (loop_vinfo);
|
||||
|
||||
|
||||
/* Finally, check that all the data references in the loop can be
|
||||
handled with respect to their alignment. */
|
||||
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
supportable_dr_alignment = vect_supportable_dr_alignment (dr);
|
||||
if (!supportable_dr_alignment)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
|
||||
{
|
||||
if (DR_IS_READ (dr))
|
||||
fprintf (vect_dump,
|
||||
"not vectorized: unsupported unaligned load.");
|
||||
else
|
||||
fprintf (vect_dump,
|
||||
"not vectorized: unsupported unaligned store.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (supportable_dr_alignment != dr_aligned
|
||||
&& (vect_print_dump_info (REPORT_ALIGNMENT)))
|
||||
fprintf (vect_dump, "Vectorizing an unaligned access.");
|
||||
}
|
||||
if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
|
||||
&& vect_print_dump_info (REPORT_ALIGNMENT))
|
||||
fprintf (vect_dump, "Alignment of access forced using peeling.");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1081,7 +1261,7 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
/* Function vect_analyze_data_ref_access.
|
||||
|
||||
Analyze the access pattern of the data-reference DR. For now, a data access
|
||||
has to consecutive to be considered vectorizable. */
|
||||
has to be consecutive to be considered vectorizable. */
|
||||
|
||||
static bool
|
||||
vect_analyze_data_ref_access (struct data_reference *dr)
|
||||
@ -1524,7 +1704,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
|
||||
/* Function vect_can_advance_ivs_p
|
||||
|
||||
In case the number of iterations that LOOP iterates in unknown at compile
|
||||
In case the number of iterations that LOOP iterates is unknown at compile
|
||||
time, an epilog loop will be generated, and the loop induction variables
|
||||
(IVs) will be "advanced" to the value they are supposed to take just before
|
||||
the epilog loop. Here we check that the access function of the loop IVs
|
||||
@ -1827,6 +2007,18 @@ vect_analyze_loop (struct loop *loop)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Analyze the alignment of the data-refs in the loop.
|
||||
Fail if a data reference is found that cannot be vectorized. */
|
||||
|
||||
ok = vect_analyze_data_refs_alignment (loop_vinfo);
|
||||
if (!ok)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "bad data alignment.");
|
||||
destroy_loop_vec_info (loop_vinfo);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ok = vect_determine_vectorization_factor (loop_vinfo);
|
||||
if (!ok)
|
||||
{
|
||||
@ -1860,10 +2052,10 @@ vect_analyze_loop (struct loop *loop)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Analyze the alignment of the data-refs in the loop.
|
||||
FORNOW: Only aligned accesses are handled. */
|
||||
/* This pass will decide on using loop versioning and/or loop peeling in
|
||||
order to enhance the alignment of data references in the loop. */
|
||||
|
||||
ok = vect_analyze_data_refs_alignment (loop_vinfo);
|
||||
ok = vect_enhance_data_refs_alignment (loop_vinfo);
|
||||
if (!ok)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
|
@ -336,7 +336,7 @@ vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
tree data_ref_base = base_name;
|
||||
fprintf (vect_dump, "create array_ref of type: ");
|
||||
fprintf (vect_dump, "create vector-pointer variable to type: ");
|
||||
print_generic_expr (vect_dump, vectype, TDF_SLIM);
|
||||
if (TREE_CODE (data_ref_base) == VAR_DECL)
|
||||
fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
|
||||
@ -2697,6 +2697,128 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_create_cond_for_align_checks.
|
||||
|
||||
Create a conditional expression that represents the alignment checks for
|
||||
all of data references (array element references) whose alignment must be
|
||||
checked at runtime.
|
||||
|
||||
Input:
|
||||
LOOP_VINFO - two fields of the loop information are used.
|
||||
LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
|
||||
LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
|
||||
|
||||
Output:
|
||||
COND_EXPR_STMT_LIST - statements needed to construct the conditional
|
||||
expression.
|
||||
The returned value is the conditional expression to be used in the if
|
||||
statement that controls which version of the loop gets executed at runtime.
|
||||
|
||||
The algorithm makes two assumptions:
|
||||
1) The number of bytes "n" in a vector is a power of 2.
|
||||
2) An address "a" is aligned if a%n is zero and that this
|
||||
test can be done as a&(n-1) == 0. For example, for 16
|
||||
byte vectors the test is a&0xf == 0. */
|
||||
|
||||
static tree
|
||||
vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
|
||||
tree *cond_expr_stmt_list)
|
||||
{
|
||||
VEC(tree,heap) *may_misalign_stmts
|
||||
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
|
||||
tree ref_stmt;
|
||||
int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
|
||||
tree mask_cst;
|
||||
unsigned int i;
|
||||
tree psize;
|
||||
tree int_ptrsize_type;
|
||||
char tmp_name[20];
|
||||
tree or_tmp_name = NULL_TREE;
|
||||
tree and_tmp, and_tmp_name, and_stmt;
|
||||
tree ptrsize_zero;
|
||||
|
||||
/* Check that mask is one less than a power of 2, i.e., mask is
|
||||
all zeros followed by all ones. */
|
||||
gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
|
||||
|
||||
/* CHECKME: what is the best integer or unsigned type to use to hold a
|
||||
cast from a pointer value? */
|
||||
psize = TYPE_SIZE (ptr_type_node);
|
||||
int_ptrsize_type
|
||||
= lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
|
||||
|
||||
/* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
|
||||
of the first vector of the i'th data reference. */
|
||||
|
||||
for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
|
||||
{
|
||||
tree new_stmt_list = NULL_TREE;
|
||||
tree addr_base;
|
||||
tree addr_tmp, addr_tmp_name, addr_stmt;
|
||||
tree or_tmp, new_or_tmp_name, or_stmt;
|
||||
|
||||
/* create: addr_tmp = (int)(address_of_first_vector) */
|
||||
addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
|
||||
&new_stmt_list,
|
||||
NULL_TREE);
|
||||
|
||||
if (new_stmt_list != NULL_TREE)
|
||||
append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
|
||||
|
||||
sprintf (tmp_name, "%s%d", "addr2int", i);
|
||||
addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
|
||||
add_referenced_tmp_var (addr_tmp);
|
||||
addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
|
||||
addr_stmt = fold_convert (int_ptrsize_type, addr_base);
|
||||
addr_stmt = build2 (MODIFY_EXPR, void_type_node,
|
||||
addr_tmp_name, addr_stmt);
|
||||
SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
|
||||
append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
|
||||
|
||||
/* The addresses are OR together. */
|
||||
|
||||
if (or_tmp_name != NULL_TREE)
|
||||
{
|
||||
/* create: or_tmp = or_tmp | addr_tmp */
|
||||
sprintf (tmp_name, "%s%d", "orptrs", i);
|
||||
or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
|
||||
add_referenced_tmp_var (or_tmp);
|
||||
new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
|
||||
or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
|
||||
build2 (BIT_IOR_EXPR, int_ptrsize_type,
|
||||
or_tmp_name,
|
||||
addr_tmp_name));
|
||||
SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
|
||||
append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
|
||||
or_tmp_name = new_or_tmp_name;
|
||||
}
|
||||
else
|
||||
or_tmp_name = addr_tmp_name;
|
||||
|
||||
} /* end for i */
|
||||
|
||||
mask_cst = build_int_cst (int_ptrsize_type, mask);
|
||||
|
||||
/* create: and_tmp = or_tmp & mask */
|
||||
and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
|
||||
add_referenced_tmp_var (and_tmp);
|
||||
and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
|
||||
|
||||
and_stmt = build2 (MODIFY_EXPR, void_type_node,
|
||||
and_tmp_name,
|
||||
build2 (BIT_AND_EXPR, int_ptrsize_type,
|
||||
or_tmp_name, mask_cst));
|
||||
SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
|
||||
append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
|
||||
|
||||
/* Make and_tmp the left operand of the conditional test against zero.
|
||||
if and_tmp has a non-zero bit then some address is unaligned. */
|
||||
ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
|
||||
return build2 (EQ_EXPR, boolean_type_node,
|
||||
and_tmp_name, ptrsize_zero);
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_transform_loop.
|
||||
|
||||
The analysis phase has determined that the loop is vectorizable.
|
||||
@ -2720,6 +2842,30 @@ vect_transform_loop (loop_vec_info loop_vinfo,
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vec_transform_loop ===");
|
||||
|
||||
/* If the loop has data references that may or may not be aligned then
|
||||
two versions of the loop need to be generated, one which is vectorized
|
||||
and one which isn't. A test is then generated to control which of the
|
||||
loops is executed. The test checks for the alignment of all of the
|
||||
data references that may or may not be aligned. */
|
||||
|
||||
if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
|
||||
{
|
||||
struct loop *nloop;
|
||||
tree cond_expr;
|
||||
tree cond_expr_stmt_list = NULL_TREE;
|
||||
basic_block condition_bb;
|
||||
block_stmt_iterator cond_exp_bsi;
|
||||
|
||||
cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
|
||||
&cond_expr_stmt_list);
|
||||
initialize_original_copy_tables ();
|
||||
nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
|
||||
free_original_copy_tables();
|
||||
update_ssa (TODO_update_ssa);
|
||||
cond_exp_bsi = bsi_last (condition_bb);
|
||||
bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
|
||||
}
|
||||
|
||||
/* CHECKME: we wouldn't need this if we calles update_ssa once
|
||||
for all loops. */
|
||||
bitmap_zero (vect_vnames_to_rename);
|
||||
|
@ -137,6 +137,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
#include "cfglayout.h"
|
||||
#include "expr.h"
|
||||
#include "optabs.h"
|
||||
#include "params.h"
|
||||
#include "toplev.h"
|
||||
#include "tree-chrec.h"
|
||||
#include "tree-data-ref.h"
|
||||
@ -1420,6 +1421,8 @@ new_loop_vec_info (struct loop *loop)
|
||||
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREFS (res), 20, "loop_datarefs");
|
||||
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DDRS (res), 20, "loop_ddrs");
|
||||
LOOP_VINFO_UNALIGNED_DR (res) = NULL;
|
||||
LOOP_VINFO_MAY_MISALIGN_STMTS (res)
|
||||
= VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -1480,6 +1483,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)
|
||||
free (LOOP_VINFO_BBS (loop_vinfo));
|
||||
varray_clear (LOOP_VINFO_DATAREFS (loop_vinfo));
|
||||
varray_clear (LOOP_VINFO_DDRS (loop_vinfo));
|
||||
VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
|
||||
|
||||
free (loop_vinfo);
|
||||
}
|
||||
@ -2049,7 +2053,7 @@ vectorize_loops (struct loops *loops)
|
||||
if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
|
||||
continue;
|
||||
|
||||
vect_transform_loop (loop_vinfo, loops);
|
||||
vect_transform_loop (loop_vinfo, loops);
|
||||
num_vectorized_loops++;
|
||||
}
|
||||
|
||||
|
@ -117,28 +117,37 @@ typedef struct _loop_vec_info {
|
||||
unaligned_dr. */
|
||||
int peeling_for_alignment;
|
||||
|
||||
/* The mask used to check the alignment of pointers or arrays. */
|
||||
int ptr_mask;
|
||||
|
||||
/* All data references in the loop. */
|
||||
varray_type datarefs;
|
||||
|
||||
/* All data dependences in the loop. */
|
||||
varray_type ddrs;
|
||||
|
||||
/* Statements in the loop that have data references that are candidates for a
|
||||
runtime (loop versioning) misalignment check. */
|
||||
VEC(tree,heap) *may_misalign_stmts;
|
||||
|
||||
/* The loop location in the source. */
|
||||
LOC loop_line_number;
|
||||
} *loop_vec_info;
|
||||
|
||||
/* Access Functions. */
|
||||
/* Access Functions. */
|
||||
#define LOOP_VINFO_LOOP(L) (L)->loop
|
||||
#define LOOP_VINFO_BBS(L) (L)->bbs
|
||||
#define LOOP_VINFO_EXIT_COND(L) (L)->exit_cond
|
||||
#define LOOP_VINFO_NITERS(L) (L)->num_iters
|
||||
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
|
||||
#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
|
||||
#define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
|
||||
#define LOOP_VINFO_DATAREFS(L) (L)->datarefs
|
||||
#define LOOP_VINFO_DDRS(L) (L)->ddrs
|
||||
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
|
||||
#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
|
||||
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
|
||||
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
|
||||
#define LOOP_VINFO_LOC(L) (L)->loop_line_number
|
||||
|
||||
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
|
||||
|
Loading…
Reference in New Issue
Block a user