From d5e3c14d8c5e3f0f374fd103557f41b87ca402a6 Mon Sep 17 00:00:00 2001 From: Sameera Deshpande Date: Thu, 25 Jan 2018 12:09:30 +0000 Subject: [PATCH] Add target specific tile generation algorithm From-SVN: r257048 --- gcc/ChangeLog | 24 + gcc/Makefile.in | 23 +- gcc/config/mips/mips.h | 33 + gcc/coretypes.h | 2 + gcc/genvect-inst-tiles.c | 2043 ++++++++++++++++++++++++++++++++ gcc/tree-vect-unified-common.c | 17 +- gcc/tree-vect-unified-opts.c | 77 +- gcc/tree-vect-unified.c | 202 +++- gcc/tree-vect-unified.h | 67 +- gcc/vec.h | 18 +- 10 files changed, 2452 insertions(+), 54 deletions(-) create mode 100644 gcc/genvect-inst-tiles.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a1829aca8564..b73eb164aefd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2017-02-13 Sameera Deshpande + + * Makefile.in: Add tree-vect-unified-opts.o in OBJS. + * tree-vect-unified-opts.c (ILV_arity_reduction): New function. + (EXTR_arity_reduction): Likewise + (k_arity_reduction): Likewise + (ILV_arity_promotion): Likewise + (merge_EXTR_nodes): Likewise + (k_arity_promotion): Likewise + (k_arity_promotion_reduction): Likewise + * tree-vect-unified.c (tree_code_name): Move up in the file. + * tree-vect-unified.h (init_stmt_attr_vec): Move to... + * tree-vect-unified.c (init_stmt_attr_vec): ...here. + * tree-vect-unified.h (free_stmt_attr_vec): Move to... + * tree-vect-unified.c (free_stmt_attr_vec): ...here + * tree-vect-unified.h (set_stmt_attr): Move to... + * tree-vect-unified.c (set_stmt_attr): ...here + * tree-vect-unified.h (get_stmt_attr): Move to... + * tree-vect-unified.c (get_stmt_attr): ...here + (populate_prim_node): Change 2nd argument. + (duplicate_prim_node): New function + (print_primtree): Likewise. + (dump_primtree_node): Likewise. + 2016-07-08 Sameera Deshpande * Makefile.in: Add tree-vect-unified.o in OBJS. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index b399008f31d8..3d23be6b4fc2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1067,7 +1067,12 @@ BUILD_RTL = build/rtl.o build/read-rtl.o build/ggc-none.o \ build/print-rtl.o build/hash-table.o BUILD_MD = build/read-md.o BUILD_ERRORS = build/errors.o +BUILD_UNITED = build/vec.o build/hash-table.o build/errors.o \ + build/ggc-none.o \ + build/tree-vect-unified-common.o build/tree-vect-unified-opts.o +build/tree-vect-unified-common.o : tree-vect-unified-common.c gtype-desc.h insn-codes.h +build/tree-vect-unified-opts.o : tree-vect-unified-opts.c gtype-desc.h insn-codes.h # Specify the directories to be searched for header files. # Both . and srcdir are used, in that order, # so that *config.h will be found in the compilation @@ -1206,6 +1211,7 @@ OBJS = \ insn-preds.o \ insn-recog.o \ insn-enums.o \ + insn-vect-inst-tiles.o \ ggc-page.o \ alias.o \ alloc-pool.o \ @@ -2207,7 +2213,7 @@ $(common_out_object_file): $(common_out_file) insn-emit.c insn-recog.c insn-extract.c insn-output.c insn-peep.c \ insn-attr.h insn-attr-common.h insn-attrtab.c insn-dfatab.c \ insn-latencytab.c insn-preds.c gimple-match.c generic-match.c \ - insn-target-def.h + insn-target-def.h insn-vect-inst-tiles.c # Dependencies for the md file. The first time through, we just assume # the md file itself and the generated dependency file (in order to get @@ -2232,9 +2238,10 @@ simple_rtl_generated_h = insn-attr.h insn-attr-common.h insn-codes.h \ simple_rtl_generated_c = insn-automata.c insn-emit.c \ insn-extract.c insn-output.c \ - insn-peep.c insn-recog.c + insn-peep.c insn-recog.c \ + insn-vect-inst-tiles.c -simple_generated_h = $(simple_rtl_generated_h) insn-constants.h +simple_generated_h = $(simple_rtl_generated_h) insn-constants.h simple_generated_c = $(simple_rtl_generated_c) insn-enums.c @@ -2602,6 +2609,8 @@ build/read-rtl.o: read-rtl.c $(BCONFIG_H) $(SYSTEM_H) coretypes.h \ $(GENSUPPORT_H) build/rtl.o: rtl.c $(BCONFIG_H) coretypes.h $(GTM_H) $(SYSTEM_H) \ $(RTL_H) $(GGC_H) errors.h +build/tree.o: tree.c $(BCONFIG_H) coretypes.h $(GTM_H) $(SYSTEM_H) \ + $(RTL_H) $(GGC_H) errors.h build/vec.o : vec.c $(BCONFIG_H) $(SYSTEM_H) coretypes.h $(VEC_H) \ $(GGC_H) toplev.h $(DIAGNOSTIC_CORE_H) build/hash-table.o : hash-table.c $(BCONFIG_H) $(SYSTEM_H) coretypes.h \ @@ -2655,6 +2664,9 @@ build/gentarget-def.o : gentarget-def.c $(BCONFIG_H) $(SYSTEM_H) \ coretypes.h $(GTM_H) $(RTL_BASE_H) errors.h $(READ_MD_H) $(GENSUPPORT_H) \ $(HASH_TABLE_H) target-insns.def build/gengenrtl.o : gengenrtl.c $(BCONFIG_H) $(SYSTEM_H) rtl.def +build/genvect-inst-tiles.o : genvect-inst-tiles.c $(RTL_BASE_H) $(BCONFIG_H) \ + $(SYSTEM_H) coretypes.h $(GTM_H) errors.h tree-vect-unified.h \ + tree-vect-unified-opts.o tree-vect-unified-common.o # The gengtype generator program is special: Two versions are built. # One is for the build machine, and one is for the host to allow @@ -2732,8 +2744,11 @@ $(genprogmd:%=build/gen%$(build_exeext)): $(BUILD_MD) genprogerr = $(genprogmd) genrtl modes gtype hooks cfn-macros $(genprogerr:%=build/gen%$(build_exeext)): $(BUILD_ERRORS) +genprogunited = vect-inst-tiles +$(genprogunited:%=build/gen%$(build_exeext)): $(BUILD_UNITED) + # Remaining build programs. -genprog = $(genprogerr) check checksum condmd match +genprog = $(genprogerr) $(genprogunited) check checksum condmd match # These programs need libs over and above what they get from the above list. build/genautomata$(build_exeext) : BUILD_LIBS += -lm diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 68b8b30c8c77..0e981f4bc35a 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -3468,4 +3468,37 @@ struct GTY(()) machine_function { (TARGET_LOAD_STORE_PAIRS && (TUNE_P5600 || TUNE_I6400) \ && !TARGET_MICROMIPS && !TARGET_FIX_24K) +#define TARGET_VEC_PERM_CONST_ORDER \ +{ \ + {2, 2, 2, "DI", (int[2]){0,2}, 1, "PCKEV.D", "RRR", NULL, NULL}, \ + {2, 2, 2, "DI", (int[2]){1,3}, 1, "PCKOD.D", "RRR", NULL, NULL}, \ +\ + {2, 4, 4, "SI", (int[4]){0,4,2,6}, 1, "ILVEV.W", "RRR", NULL, NULL}, \ + {2, 4, 4, "SI", (int[4]){1,5,3,7}, 1, "ILVOD.W", "RRR", NULL, NULL}, \ + {2, 4, 4, "SI", (int[4]){0,2,4,6}, 1, "PCKEV.W", "RRR", NULL, NULL}, \ + {2, 4, 4, "SI", (int[4]){1,3,5,7}, 1, "PCKOD.W", "RRR", NULL, NULL}, \ + {2, 4, 4, "SI", (int[4]){2,6,3,7}, 1, "ILVL.W", "RRR", NULL, NULL}, \ + {2, 4, 4, "SI", (int[4]){0,4,1,5}, 1, "ILVR.W", "RRR", NULL, NULL}, \ +\ + {2, 8, 8, "HI", (int[8]){0,8,2,10,4,12,6,14}, 1, "ILVEV.H", "RRR", NULL, NULL}, \ + {2, 8, 8, "HI", (int[8]){1,9,3,11,5,13,7,15}, 1, "ILVOD.H", "RRR", NULL, NULL}, \ + {2, 8, 8, "HI", (int[8]){0,2,4,6,8,10,12,14}, 1, "PCKEV.H", "RRR", NULL, NULL}, \ + {2, 8, 8, "HI", (int[8]){1,3,5,7,9,11,13,15}, 1, "PCKOD.H", "RRR", NULL, NULL}, \ + {2, 8, 8, "HI", (int[8]){0,8,1,9,2,10,3,11}, 1, "ILVR.H", "RRR", NULL, NULL}, \ + {2, 8, 8, "HI", (int[8]){4,12,5,13,6,14,7,15}, 1, "ILVL.H", "RRR", NULL, NULL}, \ +\ + /*{2, 16, 16, "QI", (int[16]){0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}, 1, \ + "ILVEV.Q", "RRR", NULL, NULL}, \ + {2, 16, 16, "QI", (int[16]){1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}, 1, \ + "ILVOD.Q", "RRR", NULL, NULL}, \ + {2, 16, 16, "QI", (int[16]){0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30}, 1, \ + "PCKEV.Q", "RRR", NULL, NULL}, \ + {2, 16, 16, "QI", (int[16]){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31}, 1, \ + "PCKOD.Q", "RRR", NULL, NULL}, \ + {2, 16, 16, "QI", (int[16]){8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31}, 1, \ + "ILVL.Q", "RRR", NULL, NULL}, \ + {2, 16, 16, "QI", (int[16]){0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23}, 1, \ + "ILVR.Q", "RRR", NULL, NULL}, */\ +} + #define MAX_VECTOR_SIZE 16 diff --git a/gcc/coretypes.h b/gcc/coretypes.h index 8eb33ccf1231..4f714c50b58d 100644 --- a/gcc/coretypes.h +++ b/gcc/coretypes.h @@ -358,6 +358,8 @@ typedef void (*gt_pointer_operator) (void *, void *); typedef unsigned char uchar; #endif +struct vec_perm_order_spec; + /* Most host source files will require the following headers. */ #if !defined (GENERATOR_FILE) && !defined (USED_FOR_TARGET) #include "machmode.h" diff --git a/gcc/genvect-inst-tiles.c b/gcc/genvect-inst-tiles.c new file mode 100644 index 000000000000..f887af2f5bd2 --- /dev/null +++ b/gcc/genvect-inst-tiles.c @@ -0,0 +1,2043 @@ +/* Loop Vectorization using unified representation for permute instructions. + Copyright (C) 2003-2015 Free Software Foundation, Inc. + Contributed by Sameera Deshpande + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define GENERATOR_FILE 1 +#include "bconfig.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "errors.h" +#ifdef GENERATOR_FILE +#include "machmode.h" +#include "signop.h" +#include "wide-int.h" +#include "double-int.h" +#include "real.h" +#include "fixed-value.h" +#include "statistics.h" +#include "vec.h" +#include "hash-table.h" +#include "hash-set.h" +#include "input.h" +#include "is-a.h" +#include "target.h" +#endif + +#include "tree-core.h" +#include "tree-vect-unified.h" +//#include "tree-vect-unified-common.c" +//#include "tree-vect-unified-opts.c" + +#define DEBUG 0 +int target_flags; + +enum rule_type {NT2T, NT2NT, NT2OP}; + +/* Normalized context free grammar of the form + NT --> T + NT--> NT + NT --> OP () */ +struct grammar_rule +{ + /* Pointer to vec_perm_order_spec corresponding to grammar rule. For default + rules, this value is NULL. */ + struct vec_perm_order_spec *porder; + + /* Non-terminal on LHS. */ + int lhs_nt; + + enum rule_type type; + + int spec_idx; + + int cost; + + union { + /* Terminal on RHS. */ + int terminal; + + /* Non-terminal on RHS. */ + int non_terminal; + + /* RHS of the form OP_div,sel (NT1, NT2...NTk) for k_arity operation op. */ + struct rhs_expression { + struct operation { + enum primop_code op; + int opd_selector; + int division; + int out_type; + tree *var_stride; + } primop; + + vec rhs_nt; + } rhs_exp; + } u; +}; + +struct vec_perm_order_spec target_spec[] = TARGET_VEC_PERM_CONST_ORDER; +vec rules; + +int default_extr_2_0, default_extr_2_1, default_ilv_2; + +int name_idx = 0; + +struct non_terminal +{ + char *str; + vec nt_on_lhs_rules; + vec nt_on_rhs_rules; + int state; + int type; +}; + +struct terminal +{ + char *str; + int state; + int type; +}; + +/* List of non-terminals used in grammar. The index is used in the grammar rule + to point to appropriate non-terminal in the list. For now, the non-terminal + is just list of strings with NT names. However if needed, it can be updated + to hold additional information in the structure. */ +vec non_terminals; + +/* List of terminals in Grammar. Currently, we support only 3 categories in + terminals - + MEM, REG and CONST. */ +vecterminals; + +struct transition_state +{ + int id; + vec nt; + vec rule; + vec cost; +}; + +vec states; +vec arity_list; + +struct operator_info +{ + /* Permute operation. */ + enum primop_code pcode; + + /* Arity of operator. */ + int arity; + + /* Actual arity of operator. */ + int act_arity; + + /* Selector of the operator. */ + int sel; + + /* Type of operator. */ + int type; + + /* Projection Map for ith operand of operator w.r.t. state. */ + vec map[10]; + vec index_map; + vectrans_map; + vec state_map; + vec rep_states[10]; +}; + +/* Function create_placeholder. + +*/ + +struct primop_tree * +create_placeholder (int idx, char ch, struct primop_tree *parent) +{ + struct primop_tree *ptree; + + ptree = populate_prim_node (POP_PH, NULL, + parent, NULL, NULL); + PT_PH_IDX (ptree) = idx; + PT_PH_TYPE (ptree) = ch; + return ptree; +} + +/* Function create_perm_order_tree. + + For each element in TARGET_VEC_PERM_CONST_ORDER + Do + 1. Create ILV node with arity out_vec_size. + 2. For ith element in perm_order + Do + 1. Create EXTR node with parts = in_vec_size and selector = i % parts + 2. Create child of EXTR as PLACEHOLDER__, i / parts + should not exceed num_opd. For k_arity_promotion_reduction and + unity_redundancy_elimination, PLACEHOLDER__ is used for + matching. Whereas for grammar definition, only PLACEHOLDER_ + is used for generating rules. + Done + Done +*/ + +struct primop_tree * +create_perm_order_tree (struct vec_perm_order_spec spec) +{ + int i, num; + struct primop_tree *ilv_node, *expr_node, *placeholder; + + ilv_node = create_primTree_combine (POP_ILV, NULL, + spec.out_vec_size, NULL, NULL, NULL); + + for (i = 0; i < spec.out_vec_size; i++) + { + expr_node = create_primTree_partition (POP_EXTR, NULL, + spec.in_vec_size, spec.perm_order[i] % spec.in_vec_size, + NULL, ilv_node, NULL); + num = spec.perm_order[i] / spec.in_vec_size; + placeholder = create_placeholder (num, + spec.opd_constraint[num], expr_node); + add_child_at_index (expr_node, placeholder, 0); + add_child_at_index (ilv_node, expr_node, i); + } + + return ilv_node; +} + +/* Function print_perm_order. + +*/ + +void print_perm_order (int *perm_order, int num) +{ + int i; + + for (i = 0; i < num; i++) + printf (" %d ", perm_order[i]); +} + +/* Function print_instruction_tile. + +*/ + +void +print_instruction_tile (struct primop_tree *ptree, int tab = 0) +{ + int i; + + if (PT_NODE_OP (ptree) != POP_PH) + { + printf ("\n"); + for (i = 0; i < tab; i++) + printf (" "); + } + switch (PT_NODE_OP (ptree)) + { + case POP_EXTR: + printf ("EXTR_%d,%d (", PT_DIVISION (ptree), + PT_OPERAND_SELECTOR (ptree)); + print_instruction_tile (PT_CHILD (ptree, 0), tab + 2); + printf (")"); + break; + case POP_ILV: + printf ("ILV_%d (", PT_DIVISION (ptree)); + for (i = 0; i < PT_DIVISION (ptree) - 1; i++) + { + print_instruction_tile (PT_CHILD (ptree, i), tab + 2); + printf (" , "); + } + print_instruction_tile (PT_CHILD (ptree, i), tab + 2); + printf (")"); + break; + case POP_PH: + printf ("PH%c:%d", PT_PH_TYPE (ptree), PT_PH_IDX (ptree)); + break; + default: + gcc_assert (!"\nUndesired case in printing tree.\n"); + return; + } +} + +/* Function print_instruction_tiles. + +*/ + +void +print_instruction_tiles () +{ + int i; + printf ("/*"); + for (i = 0; i < sizeof (target_spec)/sizeof (struct vec_perm_order_spec); i++) + { + printf ("\n\npermute order - "); + print_perm_order (target_spec[i].perm_order, target_spec[i].out_vec_size); + print_instruction_tile (target_spec[i].ptree); + } + printf ("*/\n\n"); +} + +/* Function create_instruction_tiles. + + For each permute_order in TARGET_VEC_PERM_CONST_ORDER + Do + 1. Create permute order tree from permute order - the permute order tree + so created is of arity out_vec_size. + 2. Perform k_arity_promotion_reduction on permute order tree to reduce the + arity to 2. As out_vec_size is power of 2, the promotion/reduction is + never going to fail. + 3. Perform unity_redundancy_elimination of kind + ILV_m (EXTR_0(S), EXTR_1(S),...EXTR_m-1(S)) => S + EXTR_m,x (ILV_M(S1, S2, ... Sm)) => Sx + to get optimal permute order tree. + Done +*/ + +void +create_instruction_tiles () +{ + int i; + struct primop_tree *ptree; + arity_list = vNULL; + arity_list.safe_insert (0, 2); + + for (i = 0; i < sizeof (target_spec)/sizeof (struct vec_perm_order_spec); i++) + { + ptree = create_perm_order_tree (target_spec[i]); + ptree = k_arity_promotion_reduction (ptree, 2); + ptree = unity_redundancy_elimination (ptree); + target_spec[i].ptree = ptree; + } + +} + +/* Function get_term_index. + + Return index of terminal. +*/ + +int +get_term_index (vec *worklist, char *str, int type) +{ + int i; + for (i = 0; i < worklist->length (); i++) + { + if (type == (*worklist)[i]->type && !strcmp ((*worklist)[i]->str, str)) + return i; + } + return -1; +} + + +/* Function get_index. + + Return index of non-terminal. +*/ + +int +get_index (vec *worklist, char *str, int type) +{ + int i; + for (i = 0; i < worklist->length (); i++) + { + if (type == (*worklist)[i]->type && !strcmp ((*worklist)[i]->str, str)) + return i; + } + return -1; +} + +/* Function create_non_terminal. + +*/ + +int +create_non_terminal (char *str, int type) +{ + int idx; + struct non_terminal *buf; + + idx = get_index (&non_terminals, str, type); + if (idx != -1) + return idx; + + idx = non_terminals.length (); + buf = (struct non_terminal *) xcalloc (1, sizeof (struct non_terminal)); + buf->str = (char *) xcalloc (strlen (str), sizeof (char)); + strcpy (buf->str, str); + buf->nt_on_lhs_rules = vNULL; + buf->nt_on_rhs_rules = vNULL; + buf->state = -1; + non_terminals.safe_insert (idx, buf); + non_terminals[idx]->type = type; + return idx; +} + +/* Function create_rule_NT_to_NT. + + Creates grammar rule of kind NT --> NT for normalized grammar. +*/ + +int +create_rule_NT_to_NT (int lhs_nt, int rhs_nt, int off, int cost) +{ + struct grammar_rule * rule; + int ruleno; + + gcc_assert (non_terminals[lhs_nt]->type == non_terminals[rhs_nt]->type); + rule = (struct grammar_rule *) xcalloc (1, sizeof (struct grammar_rule)); + rule->lhs_nt = lhs_nt; + rule->type = NT2NT; + rule->u.non_terminal = rhs_nt; + rule->spec_idx = off; + if (off != -1) + rule->cost = target_spec[off].cost; + else + rule->cost = cost; + ruleno = rules.length (); + rules.safe_insert (ruleno, rule); + non_terminals[lhs_nt]->nt_on_lhs_rules.safe_insert ( + non_terminals[lhs_nt]->nt_on_lhs_rules.length (), ruleno); + + non_terminals[rhs_nt]->nt_on_rhs_rules.safe_insert ( + non_terminals[rhs_nt]->nt_on_rhs_rules.length (), ruleno); + + return ruleno; +} + +int lookup_NT2T_in_grammar (vec *, char *, int); +/* Function create_rule_NT_to_T. + + Creates grammar rule of kind NT --> T for normalized grammar. +*/ + +int +create_rule_NT_to_T (int lhs_nt, int rhs_t, int off, int cost = 8) +{ + struct grammar_rule * rule; + int ruleno; + + ruleno = lookup_NT2T_in_grammar (&rules, non_terminals[lhs_nt]->str, rhs_t); + if (ruleno != -1) + return ruleno; + + gcc_assert (non_terminals[lhs_nt]->type == terminals[rhs_t]->type); + rule = (struct grammar_rule *) xcalloc (1, sizeof (struct grammar_rule)); + rule->lhs_nt = lhs_nt; + rule->type = NT2T; + rule->u.terminal = rhs_t; + rule->spec_idx = off; + if (off != -1) + rule->cost = target_spec[off].cost; + else + rule->cost = cost; + + ruleno = rules.length (); + rules.safe_insert (ruleno, rule); + + non_terminals[lhs_nt]->nt_on_lhs_rules.safe_insert ( + non_terminals[lhs_nt]->nt_on_lhs_rules.length (), ruleno); + return ruleno; +} + +/* Function create_rule_NT_to_op_tree. + + Creates grammar rule of kind NT --> OP (NT1, NT2 ...) for normalized grammar. +*/ + +int +create_rule_NT_to_op_tree (int lhs_nt, enum primop_code op, int selector, + int division, int *rhs_opd, int length, int off, + int otype, int cost=-1) +{ + struct grammar_rule * rule; + int ruleno; + int i; + + rule = (struct grammar_rule *) xcalloc (1, sizeof (struct grammar_rule)); + rule->lhs_nt = lhs_nt; + rule->type = NT2OP; + rule->u.rhs_exp.primop.op = op; + rule->u.rhs_exp.primop.opd_selector = selector; + rule->u.rhs_exp.primop.division = division; + rule->u.rhs_exp.primop.out_type = otype; + rule->u.rhs_exp.primop.var_stride = NULL; + rule->spec_idx = off; + if (off != -1) + rule->cost = target_spec[off].cost; + else if (cost != -1) + rule->cost = cost; + else + rule->cost = (op == POP_EXTR ? 8 : 16);//rules[default_ilv_2]->cost); + + for (i = 0; i < length; i++) + { + rule->u.rhs_exp.rhs_nt.safe_insert (i, rhs_opd[i]); + } + + ruleno = rules.length (); + rules.safe_insert (ruleno, rule); + + non_terminals[lhs_nt]->nt_on_lhs_rules.safe_insert ( + non_terminals[lhs_nt]->nt_on_lhs_rules.length (), ruleno); + + return ruleno; +} + +int +create_terminal (char *str, int type) +{ + struct terminal *buf; + int idx; + + idx = get_term_index (&terminals, str, type); + if (idx != -1) + return idx; + buf = (struct terminal *) xcalloc (1, sizeof (struct terminal)); + buf->str = (char *) xcalloc (strlen(str)+1, sizeof (char)); + strcpy (buf->str, str); + buf->state = -1; + idx = terminals.length (); + terminals.safe_insert (idx, buf); + terminals[idx]->type = type; + + return idx; +} +vec type_name = vNULL; + +int +lookup_type (vec *type_name, char *type) +{ + int i; + for (i = 0; i < type_name->length (); i++) + { + if (!strcmp (type, (*type_name)[i])) + return i; + } + char *str = (char *) xcalloc (1, strlen (type)); + strcpy (str, type); + type_name->safe_insert (i, str); + return i; +} +/* Function create_terminals. + +*/ + +void +create_terminals () +{ + struct terminal *buf; + int idx, mem, consti; + int vec_size, vector_sizes, max_vec_size, i; + int type; + + vector_sizes = MAX_VECTOR_SIZE; + max_vec_size = 1 << floor_log2 (vector_sizes); + vec_size = max_vec_size; + +// for (i = 0; i < floor_log2 (max_vec_size); i++) + for (i = 0; i < sizeof (target_spec)/sizeof (struct vec_perm_order_spec); i++) + { + char buf[20]; + int idx; + + type = lookup_type (&type_name, target_spec[i].type); + sprintf (buf, "MEM_%s", target_spec[i].type); + mem = create_terminal (buf, type); + sprintf (buf, "mem_%s", target_spec[i].type); + create_rule_NT_to_T (create_non_terminal (buf, type), mem, -1); + + sprintf (buf, "CONST_%s", target_spec[i].type); + consti = create_terminal (buf, type); + sprintf (buf, "const_%s", target_spec[i].type); + create_rule_NT_to_T (create_non_terminal (buf, type), consti, -1); + + + sprintf (buf, "REG_%s", target_spec[i].type); + idx = create_terminal (buf, type); + sprintf (buf, "reg_%s", target_spec[i].type); + create_rule_NT_to_T (create_non_terminal (buf, type), idx, -1); + create_rule_NT_to_T (create_non_terminal (buf, type), mem, -1); + create_rule_NT_to_T (create_non_terminal (buf, type), consti, -1); + } + + return; +} + +/* Function create_default_rules. + + Default rules Costs + ================================ + goal --> reg (0) + goal --> mem (0) + reg --> mem (10) + reg --> const (5) + mem --> reg (10) + mem --> const (8) + : + : + reg_32 --> REG (0) + reg_16 --> REG (0) + reg_8 --> REG (0) + reg_4 --> REG (0) + reg_2 --> REG (0) + reg -> reg_2 (0) + reg -> reg_4 (0) + reg -> reg_8 (0) + reg -> reg_16 (0) + reg -> reg_32 (0) + : + : + mem --> MEM (0) + const --> CONST (0) + reg --> EXTR_2,0 (reg) (1) + reg --> EXTR_2,1 (reg) (1) + reg --> ILV (reg, reg) (2) +*/ + +void +create_default_rules () +{ + int goal, reg, mem, consti; + int i, vec_size, max_vec_size, vector_sizes; + int v[2]; + + create_terminals (); + return; +} + +/* Function lookup_NT2T_in_grammar. + + Look-up similar rule in rule-list. +*/ + +int +lookup_NT2T_in_grammar (vec *rule, + char *nt_substr, int t_idx) +{ + int i; + + if (*rule == vNULL) + return -1; + + for (i = 0; i < rule->length (); i++) + { + struct grammar_rule *r; + rule->iterate (i, &r); + + if (r->u.terminal == t_idx + && (strstr (non_terminals[r->lhs_nt]->str, nt_substr))) + return i; + } + return -1; +} + +/* Function lookup_NT2OP_in_grammar. + + Look-up rule matching operation OP and children in vector nt_list. +*/ + +int +lookup_NT2OP_in_grammar (vec *rule, + enum primop_code code, int sel, int div, int otype, + int *nt_list, int length) +{ + int i, j; + + if (*rule == vNULL) + return -1; + + for (i = 0; i < rule->length (); i++) + { + struct grammar_rule *r; + rule->iterate (i, &r); + + if (r->u.rhs_exp.primop.op == code + && sel == r->u.rhs_exp.primop.opd_selector + && div == r->u.rhs_exp.primop.division + && otype == r->u.rhs_exp.primop.out_type) + { + for (j = 0; j < length; j++) + { + if (nt_list[j] != r->u.rhs_exp.rhs_nt[j]) + break; + } + if (j == length) + return i; + } + } + + return -1; +} + +/* Function create_rule_for_ptree. + + Recursive function to create grammar rules in normal form. + + If the leaf node with placeholder : + - check if the rule is already present. + If yes, return the previously created non-terminal + Otherwise, create new non-terminal for the place-holder, with appropriate + vector size, and create rules of the form + --> + |mem|const> --> + + If non-leaf node, + - For each child of the node invoke the function recursively. + - Once all children are processed, check if the rule with current primop + and children is already present. + If yes, return previously created non-terminal for corresponding rule. + Otherwise, create non-terminal for out_vecsize and create rules of the + form + --> PRIMOP () + |mem|const> --> +*/ + +int +create_rule_for_ptree (struct primop_tree *ptree, int spec_idx, int out_vecsize, + int in_vecsize, int target_type, int type) +{ + int chld_nt[30]; + char buf[20], name[20], buf1[20], buf2[20]; + int found, nt, new_in_vec_size, i, ruleno; + + if (PT_NODE_OP (ptree) == POP_PH) + { + sprintf (buf, "reg_%s", type_name[type]); + sprintf (buf1, "mem_%s", type_name[type]); + sprintf (buf2, "const_%s", type_name[type]); + return create_non_terminal (PT_PH_TYPE (ptree) == 'R' ? (char *) buf + : PT_PH_TYPE (ptree) == 'M' ? (char *) buf1 + : (char *)buf2, type); + } + + if (PT_NODE_OP (ptree) == POP_ILV) + new_in_vec_size = in_vecsize / PT_DIVISION (ptree); + if (PT_NODE_OP (ptree) == POP_EXTR) + new_in_vec_size = in_vecsize * PT_DIVISION (ptree); + + for (i = 0; i < ptree->children.length (); i++) + { + chld_nt[i] = create_rule_for_ptree (PT_CHILD (ptree, i), + -1, in_vecsize, new_in_vec_size, target_type, type); + } + + + found = lookup_NT2OP_in_grammar (&rules, + (enum primop_code) PT_NODE_OP (ptree), + PT_OPERAND_SELECTOR (ptree), + PT_DIVISION (ptree), type, chld_nt, i); + + if (found != -1) + { + return (rules)[found]->lhs_nt; + } + else + { + /* Create new NT, and create rule NT_to_OP. */ + sprintf (name, "inter%d", name_idx++); + nt = create_non_terminal (name, type); + ruleno = create_rule_NT_to_op_tree (nt, + (enum primop_code) PT_NODE_OP (ptree), + PT_OPERAND_SELECTOR (ptree), PT_DIVISION (ptree), + chld_nt, i, spec_idx, type); + + if (spec_idx == -1) + { + //ruleno = create_rule_NT_to_NT (create_non_terminal ("reg"), nt, -1, 0); + } + + if (spec_idx != -1) + { + sprintf (buf, "reg_%s", type_name[type]); + sprintf (buf1, "mem_%s", type_name[type]); + sprintf (buf2, "const_%s", type_name[type]); + + ruleno = create_rule_NT_to_NT ( + target_type == 'R' ? + create_non_terminal (buf, type) : + target_type == 'M' ? + create_non_terminal (buf1, type) : + create_non_terminal (buf2, type), nt, -1, 0); + } + return nt; + } +} + +void +create_default_op_rules () +{ + int goal, reg, mem, consti; + int i, j, vec_size, max_vec_size, vector_sizes; + int v[2]; + int chld_nt[4]; + char name[20]; + int found, nt, ruleno; + + + /* For each vector type supported, add NT2T rule for + reg. */ + vector_sizes = MAX_VECTOR_SIZE; + max_vec_size = 1 << floor_log2 (vector_sizes); + vec_size = max_vec_size >> 1; + + for (i = 0; i < type_name.length (); i++) + { + char buf[20]; + int idx; + + sprintf (buf, "reg_%s", type_name[i]); + idx = create_non_terminal (buf, i); + + for (j = 0; j < 2; j++) + { + chld_nt[j] = idx; + } + + + found = lookup_NT2OP_in_grammar (&rules, POP_ILV, -1, + 2, i, chld_nt, 2); + + if (found == -1) + { + /* Create new NT, and create rule NT_to_OP. */ + sprintf (name, "inter%d", name_idx++); + nt = create_non_terminal (name, i); + ruleno = create_rule_NT_to_op_tree (nt, POP_ILV, -1, 2, + chld_nt, 2, -1, i); + + ruleno = create_rule_NT_to_NT (idx, nt, -1, 0); + } + else + { + ruleno = create_rule_NT_to_NT (idx, rules[found]->lhs_nt, -1, 0); + } + + found = lookup_NT2OP_in_grammar (&rules, POP_EXTR, 0, + 2, i, chld_nt, 1); + + if (found == -1) + { + /* Create new NT, and create rule NT_to_OP. */ + sprintf (name, "inter%d", name_idx++); + nt = create_non_terminal (name, i); + ruleno = create_rule_NT_to_op_tree (nt, POP_EXTR, 0, 2, + chld_nt, 1, -1, i); + + ruleno = create_rule_NT_to_NT (idx, nt, -1, 0); + } + else + { + ruleno = create_rule_NT_to_NT (idx, rules[found]->lhs_nt, -1, 0); + } + + found = lookup_NT2OP_in_grammar (&rules, POP_EXTR, 1, + 2, i, chld_nt, 1); + + if (found == -1) + { + /* Create new NT, and create rule NT_to_OP. */ + sprintf (name, "inter%d", name_idx++); + nt = create_non_terminal (name, i); + ruleno = create_rule_NT_to_op_tree (nt, POP_EXTR, 1, 2, + chld_nt, 1, -1, i); + + ruleno = create_rule_NT_to_NT (idx, nt, -1, 0); + } + else + { + ruleno = create_rule_NT_to_NT (idx, rules[found]->lhs_nt, -1, 0); + } + + } + return; + +} + +/* Function create_grammar_rules. + + Creates grammar rules for each primop_tree. +*/ + +void +create_grammar_rules () +{ + int i; + + rules = vNULL; + non_terminals = vNULL; + terminals = vNULL; + create_default_rules (); + for (i = 0; + i < sizeof (target_spec)/sizeof (struct vec_perm_order_spec); + i++) + { + int idx = 0; + + create_rule_for_ptree (target_spec[i].ptree, i, + target_spec[i].out_vec_size, + target_spec[i].in_vec_size, + target_spec[i].opd_constraint[0], + lookup_type(&type_name, (target_spec[i].type))); + } + create_default_op_rules (); +} + +/* Function print_rule_operands. + +*/ + +void +print_rule_operands (vec *arr) +{ + int i; + printf ("%s", non_terminals[(*arr)[0]]->str); + for (i = 1; i < arr->length (); i++) + { + printf (", %s", non_terminals[(*arr)[i]]->str); + } +} + +/* Function print_grammar_rule. + +*/ + +void +print_grammar_rule (struct grammar_rule *rule) +{ + switch (rule->type) + { + case NT2T: + printf ("%s --> %s", non_terminals[rule->lhs_nt]->str, + terminals[rule->u.terminal]->str); + break; + + case NT2NT: + printf ("%s --> %s", non_terminals[rule->lhs_nt]->str, + non_terminals[rule->u.non_terminal]->str); + + break; + + case NT2OP: + printf ("%s --> ", non_terminals[rule->lhs_nt]->str); + switch (rule->u.rhs_exp.primop.op) + { + case POP_ILV: + printf ("ILV_%d_%s (", rule->u.rhs_exp.primop.division, + type_name[rule->u.rhs_exp.primop.out_type]); + break; + case POP_EXTR: + printf ("EXTR_%d,%d_%s (", rule->u.rhs_exp.primop.division, + rule->u.rhs_exp.primop.opd_selector, + type_name[rule->u.rhs_exp.primop.out_type]); + break; + default: + gcc_assert (0); + } + print_rule_operands (&rule->u.rhs_exp.rhs_nt); + printf (")"); + + break; + + default: + gcc_assert (0); + } + + if (rule->spec_idx != -1) + printf (" : [%d::%d]", rule->spec_idx, rule->cost); +} + +void +print_grammar_rules_in_comment () +{ + int i,j; + +printf ("/*\n"); + for (i = 0; i < rules.length (); i++) + { + printf ("\n%d:\t", i); + print_grammar_rule (rules[i]); + } + printf ("*/\n\n"); +} + + +void +print_grammar_rules () +{ + int i,j; + + printf ("enum rule_type {NT2T, NT2NT, NT2OP};\n\n"); + printf ("struct grammar_rule\n{\n"); + printf (" int lhs;\n enum rule_type type;\n int spec_idx;\n"); + printf (" union\n {\n"); + printf (" int terminal;\n int non_terminal;\n"); + printf (" struct\n {\n"); + printf (" enum primop_code op;\n int selector;\n"); + printf (" int division;\n int out_type;\n"); + printf (" vec opd;\n"); + printf (" } rhs_exp;\n"); + printf (" } u;\n"); + printf ("};\n\n"); + printf ("vec rules = vNULL;\n\n"); + + printf ("void\ninit_grammar_rules ()\n{\n"); + printf (" struct grammar_rule *rule;\n\n"); + + for (i = 0; i < rules.length (); i++) + { + printf (" rule = (struct grammar_rule *)"); + printf (" xcalloc (1, sizeof (struct grammar_rule));\n"); + printf (" /* %d: ", i); + print_grammar_rule (rules[i]); + printf (" */\n"); + switch (rules[i]->type) + { + case NT2T: + printf (" rule->type = NT2T;\n"); + printf (" rule->lhs = UNIF_VECT_NT_%s;\n", + non_terminals[rules[i]->lhs_nt]->str); + printf (" rule->u.terminal = UNIF_VECT_T_%s;\n", + terminals[rules[i]->u.terminal]->str); + break; + case NT2NT: + printf (" rule->type = NT2NT;\n"); + printf (" rule->lhs = UNIF_VECT_NT_%s;\n", + non_terminals[rules[i]->lhs_nt]->str); + printf (" rule->u.non_terminal = UNIF_VECT_NT_%s;\n", + non_terminals[rules[i]->u.non_terminal]->str); + break; + case NT2OP: + printf (" rule->type = NT2OP;\n"); + printf (" rule->lhs = UNIF_VECT_NT_%s;\n", + non_terminals[rules[i]->lhs_nt]->str); + printf (" rule->u.rhs_exp.op = POP_%s;\n", + tree_code_name[rules[i]->u.rhs_exp.primop.op]); + printf (" rule->u.rhs_exp.selector = %d;\n", + rules[i]->u.rhs_exp.primop.opd_selector); + printf (" rule->u.rhs_exp.division = %d;\n", + rules[i]->u.rhs_exp.primop.division); + printf (" rule->u.rhs_exp.out_type = %d;\n", + rules[i]->u.rhs_exp.primop.out_type); + printf (" rule->u.rhs_exp.opd = vNULL;\n"); + for (j = 0; j < rules[i]->u.rhs_exp.rhs_nt.length (); j++) + { + printf (" rule->u.rhs_exp.opd.safe_insert ("); + printf (" rule->u.rhs_exp.opd.length (), UNIF_VECT_NT_%s);\n", + non_terminals[rules[i]->u.rhs_exp.rhs_nt[j]]->str); + } + break; + default: + gcc_assert (!"Unknown rule."); + } + printf (" rule->spec_idx = %d;\n", rules[i]->spec_idx); + printf (" rules.safe_insert (rules.length (), rule);\n\n"); + } + printf ("}\n\n"); +} + +void +normalize_costs (struct transition_state *state) +{ + int i, delta = 0xfffffff; + + for (i = 0; i < state->rule.length (); i++) + { + if (state->rule[i] != -1 && delta > state->cost[i]) + delta = state->cost[i]; + + if (delta == 0) + break; + } + + if (delta == 0) + return; + + for (i = 0; i < state->rule.length (); i++) + { + if (state->rule[i] != -1) + state->cost[i] = state->cost[i] - delta; + } +} + +void +closure (struct transition_state *state) +{ + bool changed; + int i, cost; + + do { + changed = false; + for (i = 0; i < rules.length (); i++) + { + if (rules[i]->type == NT2NT + && state->rule[rules[i]->u.non_terminal] != -1) + { + cost = ((rules[i]->cost == -1) ? 0 + : rules[i]->cost) + + state->cost[rules[i]->u.non_terminal]; + + if (state->rule[rules[i]->lhs_nt] == -1 + || cost < state->cost[rules[i]->lhs_nt]) + { + state->rule[rules[i]->lhs_nt] = i; + state->cost[rules[i]->lhs_nt] = cost; + changed = true; + } + } + } + } while (changed == true); +} + +void +compute_leaf_states (vec *worklist) +{ + int i, j, idx; + + for (i = 0; i < terminals.length (); i++) + { + struct transition_state *state = (struct transition_state *) + xcalloc (1, sizeof (struct transition_state)); + + state->rule.reserve_exact (non_terminals.length ()); + state->cost.reserve_exact (non_terminals.length ()); + state->nt.reserve_exact (non_terminals.length ()); + for (j = 0; j < non_terminals.length (); j++) + { + state->rule.safe_insert (j, -1); + state->cost.safe_insert (j, -1); + state->nt.safe_insert (j, j); + } + + for (j = 0; j < rules.length (); j++) + { + if (rules[j]->type == NT2T + && rules[j]->u.terminal == i) + { + if (rules[j]->spec_idx == -1) + { + state->rule[rules[j]->lhs_nt] = j; + state->cost[rules[j]->lhs_nt] = 0; + } + else if (state->rule[rules[j]->lhs_nt] == -1 + || rules[j]->cost + < state->cost[rules[j]->lhs_nt]) + { + state->rule[rules[j]->lhs_nt] = j; + state->cost[rules[j]->lhs_nt] + = rules[j]->cost; + } + } + } + normalize_costs (state); + closure (state); + + worklist->safe_push (state); + idx = states.length (); + state->id = idx; + states.safe_insert (idx, state); + terminals[i]->state = idx; + } +} + +struct transition_state * +project (enum primop_code pcode, int div, int sel, int type, int idx, + struct transition_state *state) +{ + vec ntlist = vNULL; + vec costlist = vNULL; + vec rulelist = vNULL; + int j; + struct transition_state *new_state = (struct transition_state *) + xcalloc (1, sizeof (struct transition_state)); + + for (j = 0; j < rules.length (); j++) + { + if (rules[j]->type != NT2OP) + continue; + + if (rules[j]->u.rhs_exp.primop.op != pcode + || rules[j]->u.rhs_exp.primop.division != div + || non_terminals[rules[j]->lhs_nt]->type != type + || rules[j]->u.rhs_exp.primop.out_type != type) + continue; + + if (rules[j]->u.rhs_exp.primop.op == POP_EXTR + && rules[j]->u.rhs_exp.primop.opd_selector != sel) + continue; + +// if (state->cost[rules[j]->u.rhs_exp.rhs_nt[idx]] == -1) +// continue; + + ntlist.safe_insert (ntlist.length (), rules[j]->u.rhs_exp.rhs_nt[idx]); + costlist.safe_insert (costlist.length (), + state->cost[rules[j]->u.rhs_exp.rhs_nt[idx]]); + rulelist.safe_insert (rulelist.length (), j); + } + + new_state->rule = rulelist.copy (); + new_state->cost = costlist.copy (); + new_state->nt = ntlist.copy (); + new_state->id = -1; + + normalize_costs (new_state); + + return new_state; +} +bool +increment_next (int idx, int arity, vec *opd_list, + vec rep_states[], + int pstate_idx, int pstate_loc) +{ + int i; + + if (idx == pstate_loc) + { + (*opd_list)[idx] = pstate_loc; + idx++; + } + + if (idx >= arity - 1) + return false; + + (*opd_list)[idx] = 0; + + if ((*opd_list)[idx + 1] < rep_states[idx + 1].length () - 1) + { + (*opd_list)[idx + 1]++; + return true; + } + + return increment_next (idx + 1, arity, opd_list, rep_states, + pstate_idx, pstate_loc); +} + +bool +rep_state_combination_next (struct transition_state *proj_state, + vec rep_states[], + int act_arity, int idx, int rep_loc, + vec *opd_list) +{ + int i, j; + + for (i = 0; i < act_arity; i++) + if (rep_states[i] == vNULL) + return false; + + + if (*opd_list == vNULL) + { + for (i = 0; i < act_arity; i++) + if (i != idx) + opd_list->safe_insert (i, 0); + else + opd_list->safe_insert (i, rep_loc); + return true; + } + + for (i = 0; i < act_arity; i++) + { + if (i == idx) + { + (*opd_list)[i] = rep_loc; + continue; + } + if ((*opd_list)[i] < rep_states[i].length () - 1) + { + (*opd_list)[i]++; + return true; + } + else + { + for (j = 0; j < i; j++) + if (i != idx) + (*opd_list)[j] = 0; + return increment_next (j, act_arity, opd_list, + rep_states, idx, rep_loc); + } + } + return false; +} + +bool +is_state_equal_p (struct transition_state *st1, struct transition_state *st2) +{ + int i; + + if (st1->nt.length () != st2->nt.length ()) + return false; + + if (st1->rule.length () != st2->rule.length ()) + return false; + + if (st1->cost.length () != st2->cost.length ()) + return false; + + for (i = 0; i < st1->nt.length (); i++) + { + + if (st1->nt[i] != st2->nt[i] + || st1->cost[i] != st2->cost[i]) + { + return false; + } + } + return true; +} + +void +trim_state_table () +{ +} + +void +compute_transitions (struct operator_info *op, + struct transition_state *state, + vec *worklist) +{ + int i, j, l; + long long k; + int cost; + struct transition_state *proj_state; + struct transition_state *result = (struct transition_state *) + xcalloc (1, sizeof (struct transition_state)); + + result->rule.reserve_exact (non_terminals.length ()); + result->cost.reserve_exact (non_terminals.length ()); + result->nt.reserve_exact (non_terminals.length ()); + result->id = -1; + for (j = 0; j < non_terminals.length (); j++) + { + result->rule.safe_insert (j, -1); + result->cost.safe_insert (j, -1); + result->nt.safe_insert (j, j); + } + + for (i = 0; i < op->act_arity; i++) + { + vec opd_list; + proj_state = project (op->pcode, op->arity, op->sel, op->type, i, state); + + for (j = 0; j < op->rep_states[i].length (); j++) + { + if (is_state_equal_p (proj_state, op->rep_states[i][j])) + break; + } + + proj_state->id = j; + if (j == op->rep_states[i].length ()) + { + op->rep_states[i].safe_insert (j, proj_state); + } + else + continue; + + if (op->map[i].length () < state->id) + op->map[i].safe_grow_cleared (state->id, NULL); + op->map[i].safe_insert (state->id, proj_state); + + opd_list = vNULL; + for (k = 0; k < op->rep_states[i].length (); k++) + { + if (op->rep_states[i][k]->nt == vNULL) + break; + } + + if (k != op->rep_states[i].length ()) + continue; + + while (rep_state_combination_next (proj_state, + op->rep_states, + op->act_arity, + i, + j, + &opd_list)) + { + for(j = 0; j < rules.length (); j++) + { + if (rules[j]->type != NT2OP) + continue; + + if (rules[j]->cost == -1) + continue; + + if (rules[j]->u.rhs_exp.primop.op != op->pcode) + continue; + + if (rules[j]->u.rhs_exp.primop.division != op->arity) + continue; + + if (rules[j]->u.rhs_exp.primop.opd_selector != op->sel) + continue; + + if (rules[j]->u.rhs_exp.primop.out_type != op->type) + continue; + + + if (non_terminals[rules[j]->lhs_nt]->type != op->type) + continue; + + for (k = 0; k < proj_state->nt.length (); k++) + if (proj_state->nt[k] == rules[j]->u.rhs_exp.rhs_nt[i]) + break; + + if (k == proj_state->nt.length () || proj_state->cost[k] == -1) + continue; + + cost = rules[j]->cost + proj_state->cost[k]; + for (k = 0; k < op->act_arity; k++) + { + if (k == i) + continue; + + for (l = 0; + l < op->rep_states[k][opd_list[k]]->nt.length (); + l++) + if (op->rep_states[k][opd_list[k]]->nt[l] + == rules[j]->u.rhs_exp.rhs_nt[k]) + break; + + if (l == op->rep_states[k][opd_list[k]]->nt.length () + || op->rep_states[k][opd_list[k]]->cost[l] == -1) + { + cost = -1; + break; + } + else + cost = cost + + op->rep_states[k][opd_list[k]]->cost[l]; + } + if (cost == -1) + continue; + + if ((result->cost[rules[j]->lhs_nt] == -1 + || cost < result->cost[rules[j]->lhs_nt])) + { + result->cost.safe_insert (rules[j]->lhs_nt, cost); + result->rule.safe_insert (rules[j]->lhs_nt, j); + result->nt.safe_insert (rules[j]->lhs_nt, rules[j]->lhs_nt); + } + } + normalize_costs (result); + closure (result); + + + for (k = 0; k < states.length (); k++) + if (is_state_equal_p (result, states[k])) + break; + + result->id = k; + if (k == states.length ()) + { + states.safe_insert (k, result); + + worklist->safe_insert (worklist->length (), result); + } + + long long int index = 0; + for (k = op->act_arity - 1; k >= 0; k--) + { + index = (index << 16) | opd_list[k]; + } + + for (k = 0; k < op->index_map.length (); k++) + if (index == op->index_map[k] /*&& is_state_equal_p (result, + states[op->trans_map[k]])*/) + break; + + if (k != op->index_map.length ()) + continue; + + k = op->index_map.length (); + op->index_map.safe_insert (k, index); + op->trans_map.safe_insert (k, result->id); + op->state_map.safe_insert (k, state->id); + } + + } +} + +vec op_list = vNULL; +struct operator_info * +create_op (enum primop_code pcode, int arity, int sel, int act_arity, int type) +{ + struct operator_info *new_op; + int i; + + new_op = (struct operator_info *) xcalloc (1, sizeof (struct operator_info)); + new_op->pcode = pcode; + new_op->arity = arity; + new_op->sel = sel; + new_op->type = type; + new_op->act_arity = act_arity; + new_op->index_map = vNULL; + new_op->trans_map = vNULL; + for (i = 0; i < act_arity; i++) + { + new_op->map[i] = vNULL; + new_op->rep_states[i] = vNULL; + } + + return new_op; +} + +void +create_transition_table () +{ + vec worklist; + int i, j, k; + struct operator_info *op; + + states = vNULL; + worklist = vNULL; + compute_leaf_states (&worklist); + for (k = 0; k < type_name.length (); k++) + for (i = 0; i < arity_list.length (); i++) + { + for (j = 0; j < arity_list[i]; j++) + { + op = create_op (POP_EXTR, arity_list[i], j, 1, k); + op_list.safe_insert (op_list.length (), op); + } + + op = create_op (POP_ILV, arity_list[i], -1, + arity_list[i], k); + op_list.safe_insert (op_list.length (), op); + } + + while (worklist.length () != 0) + { + struct transition_state *state; + state = worklist.pop (); + + for (i = 0; i < op_list.length (); i++) + { + compute_transitions (op_list[i], state, &worklist); + } + } +} + +void +print_states () +{ + int i, j; + printf ("enum unif_vect_state {\n"); + for (i = 0; i < states.length (); i++) + { + printf (" /* state_%d: \n", i); + for (j = 0; j < non_terminals.length (); j++) + { + if (states[i]->rule[j] != -1) + { + printf ("\t"); + print_grammar_rule (rules[states[i]->rule[j]]); + printf ("\t\t%d : %d >> %d\n", states[i]->nt[j], states[i]->cost[j], j); + } + /*else if (states[i]->cost[j] != -1) + { + printf ("\t\t%d : %d\n", states[i]->nt[j], states[i]->cost[j]); + }*/ + } + printf (" */\n"); + printf (" UNIF_VECT_STATE_%d = %d,\n", i, i); + } + printf (" UNIF_VECT_STATE_MAX = %d};\n\n", i); + printf ("int state_nt_to_rule_map[UNIF_VECT_STATE_MAX][UNIF_VECT_NT_MAX];\n"); + + printf ("void\ninit_state_to_rule_map ()\n{\n"); + printf ("memset (state_nt_to_rule_map, -1, %d);\n", states.length () * non_terminals.length ()); + for (i = 0; i < states.length (); i++) + { + for (j = 0; j < non_terminals.length (); j++) + { + if (states[i]->rule[j] != -1) + { + printf ("state_nt_to_rule_map[UNIF_VECT_STATE_%d][UNIF_VECT_NT_%s] = %d;\n", i, non_terminals[j]->str, states[i]->rule[j]); + } + } + } + printf ("}\n\n"); +} + +void +print_rep_state () +{ + int i, j, k, l; + printf ("struct {\n"); + for (i = 0; i < op_list.length (); i++) + { + + for (j = 0; j < op_list[i]->act_arity; j++) + { + if (op_list[i]->pcode == POP_ILV) + { + printf (" vec nt_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, j); + printf ("[%d];\n", op_list[i]->rep_states[j].length ()); + printf (" vec cost_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, j); + printf ("[%d];\n", op_list[i]->rep_states[j].length ()); + + } + else + { + printf (" vec nt_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + printf ("[%d];\n", op_list[i]->rep_states[j].length ()); + printf (" vec cost_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + printf ("[%d];\n", op_list[i]->rep_states[j].length ()); + + } + + + } + } + printf ("} rep_state;\n\n"); + printf ("void\ninit_rep_states ()\n{\n"); + for (i = 0; i < op_list.length (); i++) + { + + for (j = 0; j < op_list[i]->act_arity; j++) + { + for (k = 0; k < op_list[i]->rep_states[j].length (); k++) + { + for (l = 0; l < op_list[i]->rep_states[j][k]->nt.length (); l++) + { + if (op_list[i]->pcode == POP_ILV) + printf (" rep_state.nt_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, j); + else + printf (" rep_state.nt_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + + printf ("[%d][%d] = UNIF_VECT_NT_%s;\n", k, l, + non_terminals[op_list[i]->rep_states[j][k]->nt[l]]->str); + + if (op_list[i]->pcode == POP_ILV) + printf (" rep_state.cost_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, j); + else + printf (" rep_state.cost_%s_%s_%d_%d", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + + printf ("[%d][%d] = %d;\n", k, l, + op_list[i]->rep_states[j][k]->cost[l]); + + + } + } + } + } + + printf ("}\n\n"); + +} + +void +print_trans_map () +{ + int i, j, k; + long long index; + char str[50], str1[100]; + printf ("struct {\n"); + for (j = 0; j < op_list.length (); j++) + { + if (op_list[j]->pcode == POP_ILV) + printf (" int %s_%s_%d", tree_code_name[op_list[j]->pcode], type_name[op_list[i]->type], + op_list[j]->arity); + else + printf (" int %s_%s_%d_%d", tree_code_name[op_list[j]->pcode], type_name[op_list[i]->type], + op_list[j]->arity, op_list[j]->sel); + + for (k = 0; k < op_list[j]->act_arity; k++) + { + printf ("[1024]"); + } + printf (";\n"); + } + printf ("} transition;\n\n"); + printf ("int parent_state[%d][UNIF_VECT_STATE_MAX];\n\n", op_list.length ()); + printf ("void\ninit_transition_table ()\n{\n"); + for (j = 0; j < op_list.length (); j++) + { + if (op_list[j]->pcode == POP_ILV) + sprintf (str, " transition.%s__%s_%d", tree_code_name[op_list[j]->pcode], type_name[op_list[i]->type], + op_list[j]->arity); + else + sprintf (str, " transition.%s_%s_%d_%d", tree_code_name[op_list[j]->pcode], type_name[op_list[i]->type], + op_list[j]->arity, op_list[j]->sel); + + for (i = 0; i < op_list[j]->index_map.length (); i++) + { + index = op_list[j]->index_map[i]; + sprintf (str1, "%s", str); + for (k = 0; k < op_list[j]->act_arity; k++) + { + sprintf (str1, "%s[%lld]", str1, (index & 0x00000000000003ff)); + index = index >> 16; + } + //printf ("%s.safe_insert (%s.length (), UNIF_VECT_STATE_%d);\n", str1, str1, op_list[j]->trans_map[i]); + printf ("%s = UNIF_VECT_STATE_%d;\n", str1, op_list[j]->trans_map[i]); + } + } + +/* for (j = 0; j < op_list.length (); j++) + { + for (i = 0; i < op_list[j]->index_map.length (); i++) + { + printf ("parent_state[%d][UNIF_VECT_STATE_%d] = UNIF_VECT_STATE_%d;\n", j, op_list[j]->trans_map[i], op_list[j]->state_map[i]); + } + }*/ + + printf ("}\n\n"); +} + +void +print_non_terminals () +{ + int i; + printf ("enum unif_vect_nt {\n"); + for (i = 0; i < non_terminals.length (); i++) + { + printf (" UNIF_VECT_NT_%s = %d,\n", non_terminals[i]->str, i); + } + printf (" UNIF_VECT_NT_MAX = %d};\n\n", i); + for (i = 0; i < non_terminals.length (); i++) + { + printf ("int get_%s_nonterminal_state ()\n{\n", non_terminals[i]->str); + printf (" return UNIF_VECT_NT_%s;\n", non_terminals[i]->str); + printf ("}\n\n"); + } + +} + +void +print_terminals () +{ + int i; + printf ("enum unif_vect_t {\n"); + for (i = 0; i < terminals.length (); i++) + { + printf (" UNIF_VECT_T_%s = %d,\n", terminals[i]->str, i); + } + printf (" UNIF_VECT_T_MAX = %d};\n\n", i); + + for (i = 0; i < terminals.length (); i++) + { + printf ("int get_%s_terminal_state ()\n{\n", terminals[i]->str); + printf (" return %d;\n", terminals[i]->state); + printf ("}\n\n"); + } + + printf ("int get_REG_terminal_state (int vector_size)\n{\n"); + printf (" switch (vector_size)\n {\n"); + for (i = 1; i <= floor_log2 (MAX_VECTOR_SIZE); i++) + { + printf ("case %d: return get_REG_%d_terminal_state ();\n", + (1 << i), (1 << i)); + } + printf (" default:\n gcc_assert (!\"vector size not supported\");\n"); + printf (" }\n}\n\n"); +} + +void +print_op_list () +{ + int i; + + printf ("enum unif_vect_op_list {\n"); + for (i = 0; i < op_list.length (); i++) + if (op_list[i]->pcode == POP_ILV) + printf (" UNIF_VECT_%s_%s_%d,\n", tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity); + else + printf (" UNIF_VECT%s_%s_%d_%d,\n", tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + printf (" UNIF_VECT_OP_MAX};\n\n"); + + +} + +void +print_map () +{ + int i, j, k; + int max = 0; + + printf ("enum map_op_name {\n"); + for (i = 0; i < op_list.length (); i++) + { + for (j = 0; j < op_list[i]->act_arity; j++) + { + printf ("MAP_OP_%s_%s_%d_%d,\n", tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, + op_list[i]->sel == -1 ? j : op_list[i]->sel); + + if (max < op_list[i]->map[j].length ()) + max = op_list[i]->map[j].length (); + } + } + printf ("MAP_OP_NAME_MAX};\n\n"); + printf ("int rep_state_map[MAP_OP_NAME_MAX][%d];\n\n", max); + + printf ("void\ninit_rep_state_map ()\n{\n"); + for (i = 0; i < op_list.length (); i++) + { + for (j = 0; j < op_list[i]->act_arity; j++) + { + for (k = 0; k < op_list[i]->map[j].length (); k++) + if (op_list[i]->map[j][k] != NULL) + printf ("rep_state_map[MAP_OP_%s_%s_%d_%d][UNIF_VECT_STATE_%d] = %d;\n", + tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], op_list[i]->arity, + op_list[i]->sel == -1 ? j : op_list[i]->sel, k, + op_list[i]->map[j][k]->id); + //printf ("%d, %d, %d\n", i, j, k); + + } + } + printf ("}\n\n"); +} + +void +print_init_func () +{ + long long int num; + char *str, *str1; + int i, j; + + printf ("void\nunif_vect_init_funct ()\n{\n"); + str1 = (char *) xcalloc (20, sizeof (char)); + str = (char *) xcalloc (40, sizeof (char)); + for (i = 0; i < op_list.length (); i++) + { + num = 1; + if (op_list[i]->pcode == POP_ILV) + { + sprintf (str1, "%s_%s_%d", tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity); + } + else + { + sprintf (str1, "%s_%s_%d_%d", tree_code_name[op_list[i]->pcode], type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + } + + sprintf (str, "%s", str1); + for (j = 0; j < op_list[i]->act_arity; j++) + { + num = num * 1024; + } + printf (" memset (transition.%s, -1, %d * sizeof (int));\n", str1, num); + + } + printf (" memset (rep_state_map, -1, sizeof (rep_state_map));\n"); + printf (" init_transition_table ();\n"); + printf (" init_rep_state_map ();\n"); + printf (" init_state_to_rule_map (); \n"); + printf (" init_grammar_rules ();\n"); + printf (" init_nt_2_rule_map ();\n}\n\n"); +} + +void +print_state_fn_for_ilv () +{ + int i, j; + char *str = (char *) xcalloc (100, sizeof (char)); + printf ("int\ntransition_state_for_ilv"); + printf (" (int act_arity, vec state_idx)\n{\n"); + + for (i = 0; i < op_list.length (); i++) + { + if (op_list[i]->pcode == POP_ILV) + { + printf (" if (act_arity == %d)\n {\n", op_list[i]->arity); + for (j = 0; j < op_list[i]->act_arity; j++) + { + printf ("\tif (rep_state_map[MAP_OP_ILV_%s_%d_%d]", + type_name[op_list[i]->type], op_list[i]->arity, j); + printf ("[state_idx[%d]] == -1)\n", j); + printf ("\t return -1;\/\/get_REG_terminal_state ();\n"); + sprintf (str, + "%s\n\t[rep_state_map[MAP_OP_ILV_%s_%d_%d][state_idx[%d]]]", + str, type_name[op_list[i]->type], op_list[i]->arity, j, j); + } + printf (" return transition.ILV_%s_%d%s", type_name[op_list[i]->type], op_list[i]->arity, str); + printf (";\n }\n"); + } + } + printf ("}\n\n"); + +} + +void +print_state_fn_for_extr () +{ + int i; + + printf ("int\ntransition_state_for_extr"); + printf (" (int act_arity, int sel, int state_idx)\n{\n"); + + for (i = 0; i < op_list.length (); i++) + { + if (op_list[i]->pcode == POP_EXTR) + { + printf (" if (act_arity == %d && sel == %d)\n {\n", + op_list[i]->arity, + op_list[i]->sel); + printf (" if (rep_state_map[MAP_OP_EXTR_%s_%d_%d]", + type_name[op_list[i]->type], op_list[i]->arity, op_list[i]->sel); + printf ("[state_idx] == -1)\n"); + printf ("\treturn -1;\/\/get_REG_terminal_state ();\n"); + printf (" return transition.EXTR_%s_%d_%d\n", type_name[op_list[i]->type], op_list[i]->arity, + op_list[i]->sel); + printf ("\t [rep_state_map[MAP_OP_EXTR_%s_%d_%d][state_idx]];\n }\n", type_name[op_list[i]->type], + op_list[i]->arity, op_list[i]->sel); + } + } + printf ("}\n\n"); +} + +void +print_permute_order_fn () +{ + printf ("struct vec_perm_order_spec target_spec[] = TARGET_VEC_PERM_CONST_ORDER;\n"); + printf ("void\nprint_permute_order (int ruleno)\n{\n"); + printf (" printf (\"\\n\");\n"); + printf (" if (rules[ruleno]->spec_idx != -1)\n {\n"); + printf (" for (int i = 0; i < target_spec[rules[ruleno]->spec_idx].out_vec_size; i++)\n"); + printf ("\t{\n"); + printf ("\t printf (\"%%d \", target_spec[rules[ruleno]->spec_idx].perm_order[i]);\n"); + printf ("\t}\n"); + printf (" printf (\"\\n\");\n"); + printf (" }\n"); + printf ("}\n\n"); +} + +void +print_get_rule_no_fn () +{ + printf ("/* Each entry holds list of rules which result in the non-terminal. */\n"); + printf ("vec nt_2_rule_rel[UNIF_VECT_NT_MAX];\n\n"); + printf ("void\ninit_nt_2_rule_map ()\n{"); + printf (" int i, j;\n"); + printf (" for (i = 0; i < UNIF_VECT_NT_MAX; i++)\n {\n"); + printf (" nt_2_rule_rel[i] = vNULL;"); + printf (" }\n"); + printf (" for (i = 0; i < rules.length (); i++)\n {\n"); + printf (" nt_2_rule_rel[rules[i]->lhs].safe_insert (nt_2_rule_rel[rules[i]->lhs].length (), i);\n"); + printf (" }\n"); + printf ("}\n\n"); + + printf ("int\nget_rule_number (struct primop_tree *ptree, int nt)\n{\n"); + printf (" return state_nt_to_rule_map[PT_AUX(ptree)][nt];"); + printf ("}\n\n"); +} + +void +print_get_child_nt_fn () +{ + printf ("bool is_NT2T_rule (int ruleno)\n{\n"); + printf (" return (rules[ruleno]->type == NT2T);\n"); + printf ("}\n\n"); + printf ("int\nget_child_nt (int state, int ruleno, int nt_idx)\n{\n"); + printf (" if (rules[ruleno]->type == NT2OP)\n"); + printf (" return rules[ruleno]->u.rhs_exp.opd[nt_idx];\n"); + printf (" if (rules[ruleno]->type == NT2NT)\n {\n"); + printf (" return get_child_nt (state, state_nt_to_rule_map[state][rules[ruleno]->u.non_terminal], nt_idx);\n }\n"); + printf (" if (rules[ruleno]->type == NT2T)\n"); + printf (" gcc_assert (0);\n"); + printf ("}\n\n"); +} + +void +print_transition_table () +{ + print_op_list (); + print_non_terminals (); + print_terminals (); + print_states (); + + print_grammar_rules (); + print_trans_map (); + print_map (); + print_get_rule_no_fn (); + print_get_child_nt_fn (); + print_init_func (); + print_state_fn_for_ilv (); + print_state_fn_for_extr (); + print_permute_order_fn (); +} + +int main (int argc, const char **argv) +{ + printf ("/* Generated automatically by the program `genvect-inst-tiles'\n\ +from the macro TARGET_VEC_PERM_CONST_ORDER in target header file. */\n\n"); +printf ("#include \"config.h\"\n"); +printf ("#include \"system.h\"\n"); +printf ("#include \"coretypes.h\"\n"); +printf ("#include \"backend.h\"\n"); +printf ("#include \"tree.h\"\n"); +printf ("#include \"gimple.h\"\n"); +printf ("#include \"predict.h\"\n"); +printf ("#include \"tree-pass.h\"\n"); +printf ("#include \"ssa.h\"\n"); +printf ("#include \"cgraph.h\"\n"); +printf ("#include \"fold-const.h\"\n"); +printf ("#include \"stor-layout.h\"\n"); +printf ("#include \"gimple-iterator.h\"\n"); +printf ("#include \"gimple-walk.h\"\n"); +printf ("#include \"tree-ssa-loop-manip.h\"\n"); +printf ("#include \"tree-cfg.h\"\n"); +printf ("#include \"cfgloop.h\"\n"); +printf ("#include \"tree-vectorizer.h\"\n"); +printf ("#include \"tree-ssa-propagate.h\"\n"); +printf ("#include \"dbgcnt.h\"\n"); +printf ("#include \"tree-scalar-evolution.h\"\n"); +printf ("#include \"tree-vect-unified.h\"\n"); +printf ("#include \"tree-pretty-print.h\"\n"); +printf ("#include \"gimple-pretty-print.h\"\n"); +printf ("#include \"target.h\"\n"); +printf ("#include \"rtl.h\"\n"); +printf ("#include \"tm_p.h\"\n"); +printf ("#include \"optabs-tree.h\"\n"); +printf ("#include \"dumpfile.h\"\n"); +printf ("#include \"alias.h\"\n"); +printf ("#include \"tree-eh.h\"\n"); +printf ("#include \"gimplify.h\"\n"); +printf ("#include \"gimplify-me.h\"\n"); +printf ("#include \"tree-ssa-loop-ivopts.h\"\n"); +printf ("#include \"tree-ssa-loop.h\"\n"); +printf ("#include \"expr.h\"\n"); +printf ("#include \"builtins.h\"\n"); +printf ("#include \"params.h\"\n"); +printf ("#include \"pretty-print.h\"\n"); +printf ("\n"); + create_instruction_tiles (); + print_instruction_tiles (); + create_grammar_rules (); + print_grammar_rules_in_comment (); + create_transition_table (); + print_transition_table (); +} + diff --git a/gcc/tree-vect-unified-common.c b/gcc/tree-vect-unified-common.c index 90ca6df7c575..a31ed8ae4d33 100644 --- a/gcc/tree-vect-unified-common.c +++ b/gcc/tree-vect-unified-common.c @@ -61,7 +61,6 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "params.h" #include "pretty-print.h" -#include "pretty-print.h" #else #include "errors.h" #include "machmode.h" @@ -78,9 +77,6 @@ along with GCC; see the file COPYING3. If not see #include "is-a.h" #include "target.h" #include "tree-core.h" -#include "tree-vect-unified.h" - - #endif #include "tree-vect-unified.h" @@ -128,7 +124,7 @@ init_primop_node (void) struct primop_tree * populate_prim_node (enum primop_code pcode, tree iter_count, - struct primop_tree *parent, gimple *stmt) + struct primop_tree *parent, gimple *stmt, tree vec_type) { struct primop_tree *ptree; ptree = init_primop_node (); @@ -136,7 +132,7 @@ populate_prim_node (enum primop_code pcode, tree iter_count, PT_NODE_OP (ptree) = (int) pcode; PT_PARENT (ptree) = parent; PT_ITER_COUNT (ptree) = iter_count; - + PT_VEC_TYPE (ptree) = vec_type; #ifndef GENERATOR_FILE if (stmt) { @@ -154,11 +150,12 @@ populate_prim_node (enum primop_code pcode, tree iter_count, which primtree is being created. */ struct primop_tree * create_primTree_combine (enum primop_code pcode, gimple *stmt, int parts, - tree iter_count, struct primop_tree *parent) + tree iter_count, struct primop_tree *parent, + tree vec_type) { struct primop_tree * ptree; - ptree = populate_prim_node (pcode, iter_count, parent, stmt); + ptree = populate_prim_node (pcode, iter_count, parent, stmt, vec_type); PT_OPERAND_SELECTOR (ptree) = -1; PT_DIVISION (ptree) = parts; PT_VAR_STRIDE (ptree) = NULL; @@ -183,11 +180,11 @@ create_primTree_combine (enum primop_code pcode, gimple *stmt, int parts, struct primop_tree * create_primTree_partition (enum primop_code pcode, gimple *stmt, int parts, int selector, tree iter_count, - struct primop_tree *parent) + struct primop_tree *parent, tree vec_type) { struct primop_tree * ptree; - ptree = populate_prim_node (pcode, iter_count, parent, stmt); + ptree = populate_prim_node (pcode, iter_count, parent, stmt, vec_type); PT_OPERAND_SELECTOR (ptree) = selector; PT_DIVISION (ptree) = parts; PT_VAR_STRIDE (ptree) = NULL; diff --git a/gcc/tree-vect-unified-opts.c b/gcc/tree-vect-unified-opts.c index 59922ecee2fa..cc25233b31b5 100644 --- a/gcc/tree-vect-unified-opts.c +++ b/gcc/tree-vect-unified-opts.c @@ -61,7 +61,6 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "params.h" #include "pretty-print.h" -#include "pretty-print.h" #else # include "errors.h" #include "machmode.h" @@ -78,8 +77,6 @@ along with GCC; see the file COPYING3. If not see #include "is-a.h" #include "target.h" #include "tree-core.h" -#include "tree-vect-unified.h" - #endif #include "tree-vect-unified.h" @@ -116,12 +113,13 @@ ILV_arity_reduction (struct primop_tree *root, int from_arity, int to_arity) #endif new_root = create_primTree_combine (POP_ILV, NULL, to_arity, - new_iter_count, PT_PARENT (root)); + new_iter_count, PT_PARENT (root), PT_VEC_TYPE (root)); for (i = 0; i < to_arity; i++) { new_child = create_primTree_combine (POP_ILV, NULL, - from_arity / to_arity, PT_ITER_COUNT (root), new_root); + from_arity / to_arity, PT_ITER_COUNT (root), new_root, + PT_VEC_TYPE (root)); for (j = 0; j < from_arity / to_arity; j++) { @@ -173,11 +171,11 @@ EXTR_arity_reduction (struct primop_tree *root, int from_arity, int to_arity) new_root = create_primTree_partition (POP_EXTR, NULL, to_arity, (PT_OPERAND_SELECTOR (root) * to_arity / from_arity) % to_arity, - new_iter_count, PT_PARENT (root)); + new_iter_count, PT_PARENT (root), PT_VEC_TYPE (root)); new_child = create_primTree_partition (POP_EXTR, NULL, from_arity/to_arity, PT_OPERAND_SELECTOR (root) % (from_arity / to_arity), - PT_ITER_COUNT (root), new_root); + PT_ITER_COUNT (root), new_root, PT_VEC_TYPE (root)); add_child_at_index (new_child, PT_CHILD (root, 0), 0); @@ -258,14 +256,14 @@ ILV_arity_promotion (struct primop_tree *root, int from_arity, int to_arity) #endif new_root = create_primTree_combine (POP_ILV, NULL, to_arity, - new_iter_count, PT_PARENT (root)); + new_iter_count, PT_PARENT (root), PT_VEC_TYPE (root)); for (i = 0; i < to_arity / from_arity; i++) { for (j = 0; j < from_arity; j++) { new_child = create_primTree_partition (POP_EXTR, NULL, to_arity / from_arity, i, PT_ITER_COUNT (root), - new_root); + new_root, PT_VEC_TYPE (root)); add_child_at_index (new_child, PT_CHILD (root, j), 0); tmp = k_arity_promotion_reduction (new_child, to_arity); if (tmp != NULL) @@ -313,7 +311,8 @@ merge_EXTR_nodes (struct primop_tree *root, int to_arity) if (iter_node != PT_CHILD (root, 0)) { tmp = create_primTree_partition (POP_EXTR, NULL, parts, selector, - iter_count, PT_PARENT (root)); + iter_count, PT_PARENT (root), + PT_VEC_TYPE (root)); add_child_at_index (tmp, iter_node, 0); return tmp; } @@ -520,8 +519,8 @@ int annotate_tree_nodes (struct primop_tree *ptree, int *end, struct primop_tree **leaves) { - int key; - long long int idx; + long long int key; + int idx; int length = 0; int i, j; struct primop_tree *temp[150]; @@ -541,8 +540,8 @@ annotate_tree_nodes (struct primop_tree *ptree, int *end, for (i = 0; i < ptree->children.length (); i++) { - key = (key | annotate_tree_nodes (PT_CHILD (ptree, i), - &length, temp)) << 12; + key = (key << 12) + | annotate_tree_nodes (PT_CHILD (ptree, i), &length, temp); for (j = 0; j < length; j++) { leaves[(*end)++] = temp[j]; @@ -575,7 +574,11 @@ unity_redundancy_elimination_2 (struct primop_tree *ptree, bool changed = false; int to_be_matched; struct primop_tree *temp_ptree; - int i; + int i, j; + long long int key; + int idx, end; + struct primop_tree *leaves[150]; + struct primtree_hash_table *new_hash; *new_ptree = ptree; @@ -612,14 +615,53 @@ unity_redundancy_elimination_2 (struct primop_tree *ptree, } } + key = PT_NODE_OP (ptree) << 10; + end = 0; for (i = 0; i < (*new_ptree)->children.length (); i++) { changed |= unity_redundancy_elimination_2 (PT_CHILD (*new_ptree, i), &temp_ptree); PT_CHILD (*new_ptree, i) = temp_ptree; PT_PARENT (temp_ptree) = *new_ptree; + if (PT_NODE_OP (temp_ptree) == POP_MEMREF + || PT_NODE_OP (temp_ptree) == POP_PH + || PT_NODE_OP (temp_ptree) == POP_CONST) + { + key = (key | 0xfff) << 12; + leaves[end++] = temp_ptree; + } + else + { + key = (key << 12) | PT_AUX(temp_ptree); + + for (j = 0; + j < primop_hash[PT_AUX(temp_ptree)]->leaves.length (); + j++) + { + leaves[end + j] = primop_hash[PT_AUX(temp_ptree)]->leaves[j]; + } + + end = end + j; + } + } + idx = lookup_key_in_table (key, leaves, end); + + if (idx == -1) + { + // Create new entry. + new_hash = (struct primtree_hash_table *) xcalloc (1, + sizeof (struct primtree_hash_table)); + new_hash->key = key; + new_hash->leaves = vNULL; + for (i = 0; i < end; i++) + new_hash->leaves.safe_insert (new_hash->leaves.length (), leaves[i]); + idx = primop_hash.length (); + primop_hash.safe_insert (idx, new_hash); + } + PT_AUX (*new_ptree) = idx; + return changed; } @@ -640,13 +682,12 @@ unity_redundancy_elimination (struct primop_tree *ptree) bool changed; annotate_tree_nodes (ptree, &end, dummy); - changed = false; do { changed = unity_redundancy_elimination_2 (ptree, &new_ptree); - if (ptree == new_ptree) - break; + //if (ptree == new_ptree) + // break; ptree = new_ptree; } while (changed == true); diff --git a/gcc/tree-vect-unified.c b/gcc/tree-vect-unified.c index 8e475f2992c1..03e26254fd4a 100644 --- a/gcc/tree-vect-unified.c +++ b/gcc/tree-vect-unified.c @@ -1559,16 +1559,17 @@ exists_primTree_with_memref (tree base, tree step, bool is_read, struct primop_tree * create_primTree_memref (tree base, tree step, bool is_read, int num, - tree iter_count, struct primop_tree *parent) + tree iter_count, struct primop_tree *parent, + tree vec_type) { struct primop_tree * ptree; - ptree = populate_prim_node (POP_MEMREF, iter_count, parent, NULL); + ptree = populate_prim_node (POP_MEMREF, iter_count, parent, NULL, vec_type); PT_MEMVAL_BASE (ptree) = unshare_expr (base); PT_MEMVAL_MULT_IDX (ptree) = unshare_expr (step); PT_MEMVAL_IS_READ (ptree) = is_read; - + PT_VEC_TYPE (ptree) = vec_type; if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, @@ -1726,18 +1727,19 @@ vectorizable_store (gimple *stmt, struct ITER_node *inode, if (pnode == NULL) { pnode = create_primTree_memref (base, step, false, num, - ITER_NODE_NITERS (inode), NULL); + ITER_NODE_NITERS (inode), NULL, vec_type); ITER_NODE_LOOP_BODY (inode).safe_insert ( ITER_NODE_LOOP_BODY (inode).length (), pnode); pchild1 = create_primTree_combine (POP_ILV, stmt, tree_to_uhwi (step) / num, ITER_NODE_NITERS (inode), - pnode); + pnode, vec_type); add_child_at_index (pnode, pchild1, 0); } else { pchild1 = get_child_at_index (pnode, 0); + gcc_assert (PT_VEC_TYPE (pchild1) == vec_type); } if (def_stmt) { @@ -1765,7 +1767,7 @@ vectorizable_store (gimple *stmt, struct ITER_node *inode, } add_child_at_index (pchild1, pchild2, tree_to_uhwi (offset) / num); - + gcc_assert (PT_VEC_TYPE (pchild2)); return pnode; } @@ -1843,9 +1845,9 @@ vectorizable_load (gimple *stmt, struct ITER_node *inode, pnode = create_primTree_partition (POP_EXTR, stmt, tree_to_uhwi (step) / num, - tree_to_uhwi (offset) / num, ITER_NODE_NITERS (inode), parent); + tree_to_uhwi (offset) / num, ITER_NODE_NITERS (inode), parent, vec_type); pchild1 = create_primTree_memref (base, step, true, num, - ITER_NODE_NITERS (inode), pnode); + ITER_NODE_NITERS (inode), pnode, vec_type); add_child_at_index (pnode, pchild1, 0); return pnode; } @@ -2390,6 +2392,158 @@ dump_iter_node (struct ITER_node *inode, FILE *fp) pp_flush (&pp); } +static void +reset_aux_field (struct primop_tree *ptree) +{ + int i; + PT_AUX (ptree) = -1; + if (PT_ARITY (ptree) == 0) + return; + + for (i = 0; i < ptree->children.length (); i++) + reset_aux_field (get_child_at_index (ptree, i)); +} + +static int +get_transition_state (struct primop_tree *ptree) +{ + int i; + vec idx = vNULL; + + /* If the node is non-permute operation, return the state of terminal 'REG' as + state of this tree, because non-permute operations are evaluated in + registers. */ + if (PT_NODE_OP (ptree) < MAX_TREE_CODES) + { + return get_REG_terminal_state (GET_MODE_SIZE (TYPE_MODE (PT_VEC_TYPE (ptree)))); + } + + /* We need not handle POP_PH as it is only for tile construction. POP_CONCAT + and POP_SPLT are now represented using POP_ILV and POP_EXTR for now. Hence + these operators need not be handled here. POP_MEMREF and POP_CONST are + leaf nodes, and won't be passed to this function. POP_INV for loop + invariants, POP_COLLAPSE for reduction operation and POP_ITER for loop or + vec_size_reduction operation need TODO. */ + + switch (PT_NODE_OP (ptree)) + { + case POP_ILV: + for (i = 0; i < ptree->children.length (); i++) + { + idx.safe_insert(idx.length (), + PT_AUX (get_child_at_index (ptree, i))); + } + + return transition_state_for_ilv (PT_DIVISION (ptree), idx); + + case POP_EXTR: + return transition_state_for_extr (PT_DIVISION (ptree), + PT_OPERAND_SELECTOR (ptree), + PT_AUX (get_child_at_index (ptree, 0))); + + default: + gcc_assert (!"Operator not handled."); + } + return -1; +} + +static bool +label_permute_tree (struct primop_tree *ptree) +{ + bool ret = true; + int i; + + if (PT_ARITY (ptree) == 0) + { + switch (PT_NODE_OP (ptree)) + { + case POP_MEMREF: + PT_AUX (ptree) = get_REG_terminal_state (GET_MODE_SIZE (TYPE_MODE (PT_VEC_TYPE (ptree)))); + printf ("tree : %d >> state : %d\n", PT_PID (ptree), PT_AUX (ptree)); + break; + case POP_CONST: + PT_AUX (ptree) = get_CONST_terminal_state (); + printf ("tree : %d >> state : %d\n", PT_PID (ptree), PT_AUX (ptree)); + break; + default: + gcc_assert (0); + } + return true; + } + + for (i = 0; i < ptree->children.length (); i++) + { + ret |= label_permute_tree (get_child_at_index (ptree, i)); + if (ret == false) + return false; + } + + if (PT_NODE_OP (ptree) == POP_MEMREF) + PT_AUX (ptree) = PT_AUX (get_child_at_index (ptree, 0)); + else + PT_AUX (ptree) = get_transition_state (ptree); + printf ("tree : %d >> state : %d\n", PT_PID (ptree), PT_AUX (ptree)); + + if (PT_AUX (ptree) == -1) + { + printf ("\n labeled to REG\n"); + PT_AUX (ptree) = (get_REG_terminal_state (GET_MODE_SIZE (TYPE_MODE (PT_VEC_TYPE (ptree))))); + } + else + { + printf ("%d\t", PT_AUX (ptree)); + } + + return true; +} + +static bool +reduce_permute_tree (struct primop_tree *ptree, int goal_nt) +{ + int rule_no; + int i; + +// if (PT_AUX (ptree) == get_REG_terminal_state ()) +// reduce_permute_tree(ptree, 0); + rule_no = get_rule_number (ptree, goal_nt); + if (rule_no == -1) { + printf ("\n Matched to default rule : %d\n", PT_PID (ptree)); + } else if (is_NT2T_rule (rule_no)) { + printf ("Terminal matched.\n"); + } else { + printf ("\n Rule matched: %d.\t State matched: %d.\n", rule_no, PT_AUX (ptree)); + print_permute_order (rule_no); + if (PT_ARITY (ptree) != 0 && PT_NODE_OP (ptree) == POP_MEMREF) + return reduce_permute_tree (PT_CHILD (ptree, 0), goal_nt); + for (i = 0; i < PT_ARITY (ptree); i++) + { + reduce_permute_tree (PT_CHILD (ptree, i), get_child_nt (PT_AUX (ptree), rule_no, i)); + } + } + + return true;//(rule_no >= 0); + +} + +static bool +unified_perm_tree_code_generation (struct ITER_node *inode) +{ + int i; + bool ret = false; + struct primop_tree *tmp_tree; + + for (i = 0; i < (ITER_NODE_LOOP_BODY (inode)).length (); i++) + { + tmp_tree = (ITER_NODE_LOOP_BODY (inode))[i]; + reset_aux_field (tmp_tree); + ret = label_permute_tree (tmp_tree); + if (ret == true) + ret = reduce_permute_tree (tmp_tree, get_REG_terminal_state (GET_MODE_SIZE (TYPE_MODE (PT_VEC_TYPE (tmp_tree))))); + + return ret; + } +} + /* Function vectorize_loops_using_uniop. Entry point to autovectorization using unified representation: @@ -2420,6 +2574,7 @@ vectorize_loops_using_uniop (void) //iter_node = NULL; init_stmt_attr_vec (); + unif_vect_init_funct (); FOR_EACH_LOOP (loop, 0) if (loop->dont_vectorize) @@ -2527,6 +2682,35 @@ vectorize_loops_using_uniop (void) dump_iter_node (tmp_iter_node, alt_dump_file); } + worklist = vNULL; + worklist = (ITER_NODE_LOOP_BODY (tmp_iter_node)).copy (); + for (i = 0; i < worklist.length (); i++) + { + gcc_assert (worklist.iterate (i, &tmp_tree)); + tmp_tree = unity_redundancy_elimination (tmp_tree); + + ITER_NODE_LOOP_BODY (tmp_iter_node)[i] = tmp_tree; + } + + if (dump_enabled_p ()) + { + dump_printf (MSG_NOTE, "\nUnity redundancy elimination applied.\n"); + if (dump_file) + dump_iter_node (tmp_iter_node, dump_file); + if (alt_dump_file) + dump_iter_node (tmp_iter_node, alt_dump_file); + } + + //unified_vecsize_reduction (tmp_iter_node); + + if (dump_enabled_p ()) + { + dump_printf (MSG_NOTE, "\nVector size reduction applied.\n"); + if (dump_file) + dump_iter_node (tmp_iter_node, dump_file); + if (alt_dump_file) + dump_iter_node (tmp_iter_node, alt_dump_file); + } gimple *loop_vectorized_call = vect_loop_vectorized_call (loop); /* If the loop is vectorized, set uid of stmts within scalar loop to 0. This change is needed if transform phase uses this loop info. */ @@ -2535,6 +2719,8 @@ vectorize_loops_using_uniop (void) /* TODO: Insert call to transformation entry point. */ + unified_perm_tree_code_generation (tmp_iter_node); + num_vectorized_loops++; /* Now that the loop has been vectorized, allow it to be unrolled etc. */ diff --git a/gcc/tree-vect-unified.h b/gcc/tree-vect-unified.h index 9991ede926ae..c1523ca1ad5b 100644 --- a/gcc/tree-vect-unified.h +++ b/gcc/tree-vect-unified.h @@ -224,6 +224,7 @@ struct primop_tree { } phval; } u; int aux; +// vec aux1; }; #define PT_PID(x) (x)->pid @@ -250,6 +251,7 @@ struct primop_tree { #define PT_MEMVAL_MULT_IDX(x) (x)->u.memval.mult_idx #define PT_MEMVAL_IS_READ(x) (x)->u.memval.is_read #define PT_AUX(x) (x)->aux +//#define PT_AUX1(x) (x)->aux1 #define PT_PH_IDX(x) (x)->u.phval.index #define PT_PH_TYPE(x) (x)->u.phval.type //struct ITER_node *iter_node; @@ -285,6 +287,51 @@ static const char *const tree_code_name[] = { #undef DEFTREECODE #undef END_OF_BASE_TREE_CODES +/* TARGET_VEC_PERM_CONST_ORDER - If defined, this target hook points to an array + of "struct vec_perm_order_spec" specifying various permute orders supported + by the target architecture. */ + +struct vec_perm_order_spec +{ + /* Number of operands in permute order specified. */ + int num_opd; + + /* Vector size of input permute order. */ + int in_vec_size; + + /* Vector size of resultant permute order. It can be identified from the size + of perm_order, however, if by mistake, this field is not defined properly, + can lead to errors. Hence, taking that as input. */ + int out_vec_size; + + /* Input type name. */ + char *type; + + /* Permute order of operands. */ + int *perm_order; + + /* Cost of permute operation. */ + int cost; + + /* Name of permute operation for debugging purpose. */ + char *op_name; + + /* The constraints on input and output operands of this instruction. + Restricting these to R,M or I for register, memory and integer constant + respectively. This is needed for reduction rules to be generated for BURS + tree. It should have comma separated list - with num_opd + 1 listings. */ + char * opd_constraint; + + /* Condition under which the instruction can be emitted. Thinking of + something like condition part in define_insn. */ + char * cond; + + /* PRIMOP_TREE constructed after tile construction. */ + struct primop_tree *ptree; +}; + + + extern unsigned int vectorize_loops_using_uniop (void); extern struct primop_tree * analyze_and_create_ptree (struct primop_tree *, gimple *, struct ITER_node *); @@ -295,16 +342,16 @@ extern struct primop_tree * k_arity_promotion_reduction (struct primop_tree *, int); extern struct primop_tree * init_primop_node (void); extern struct primop_tree * populate_prim_node (enum primop_code, tree, - struct primop_tree *, gimple *); + struct primop_tree *, gimple *, tree); extern struct primop_tree * exists_primTree_with_memref (tree, tree, bool, struct ITER_node *); extern struct primop_tree * create_primTree_memref (tree, tree, bool, int, tree, - struct primop_tree *); + struct primop_tree *, tree); extern struct primop_tree * create_primTree_combine (enum primop_code, gimple *, - int, tree, struct primop_tree *); + int, tree, struct primop_tree *, tree); extern struct primop_tree * create_primTree_partition (enum primop_code, gimple *, int, int, tree, - struct primop_tree *); + struct primop_tree *, tree); extern void add_child_at_index (struct primop_tree *, struct primop_tree *, int); extern struct primop_tree * get_child_at_index (struct primop_tree *, int); @@ -316,6 +363,16 @@ extern void free_stmt_attr_vec (void); extern inline void set_stmt_attr (gimple *, struct stmt_attr *); extern inline struct stmt_attr *get_stmt_attr (gimple *); extern struct primop_tree * unity_redundancy_elimination (struct primop_tree *); - +extern void unif_vect_init_funct (void); +extern vec transition_state_for_extr (int, int, int); +extern vec transition_state_for_ilv (int, vec); +extern int get_REG_terminal_state (int); +extern bool is_NT2T_rule (int); +extern int get_CONST_terminal_state (); +extern int get_MEM_terminal_state (); +//extern int get_goal_nonterminal_state (int); +extern int get_rule_number (struct primop_tree *, int); +extern void print_permute_order (int); +extern int get_child_nt (int, int, int); #endif diff --git a/gcc/vec.h b/gcc/vec.h index fee46164b010..2cb1f74c65cd 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -486,7 +486,7 @@ public: static size_t embedded_size (unsigned); void embedded_init (unsigned, unsigned = 0, unsigned = 0); void quick_grow (unsigned len); - void quick_grow_cleared (unsigned len); + void quick_grow_cleared (unsigned len, int = 0); /* vec class can access our internal data and functions. */ template friend struct vec; @@ -1089,13 +1089,13 @@ vec::quick_grow (unsigned len) template inline void -vec::quick_grow_cleared (unsigned len) +vec::quick_grow_cleared (unsigned len, int val) { unsigned oldlen = length (); size_t sz = sizeof (T) * (len - oldlen); quick_grow (len); if (sz != 0) - memset (&(address ()[oldlen]), 0, sz); + memset (&(address ()[oldlen]), val, sz); } @@ -1235,9 +1235,9 @@ public: T &pop (void); void truncate (unsigned); void safe_grow (unsigned CXX_MEM_STAT_INFO); - void safe_grow_cleared (unsigned CXX_MEM_STAT_INFO); + void safe_grow_cleared (unsigned CXX_MEM_STAT_INFO, int = 0); void quick_grow (unsigned); - void quick_grow_cleared (unsigned); + void quick_grow_cleared (unsigned, int = 0); void quick_insert (unsigned, const T &); void safe_insert (unsigned, const T & CXX_MEM_STAT_INFO); void ordered_remove (unsigned); @@ -1604,13 +1604,13 @@ vec::safe_grow (unsigned len MEM_STAT_DECL) template inline void -vec::safe_grow_cleared (unsigned len MEM_STAT_DECL) +vec::safe_grow_cleared (unsigned len MEM_STAT_DECL, int val) { unsigned oldlen = length (); size_t sz = sizeof (T) * (len - oldlen); safe_grow (len PASS_MEM_STAT); if (sz != 0) - memset (&(address ()[oldlen]), 0, sz); + memset (&(address ()[oldlen]), val, sz); } @@ -1632,10 +1632,10 @@ vec::quick_grow (unsigned len) template inline void -vec::quick_grow_cleared (unsigned len) +vec::quick_grow_cleared (unsigned len, int val) { gcc_checking_assert (m_vec); - m_vec->quick_grow_cleared (len); + m_vec->quick_grow_cleared (len, val); }