diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 1487afc5ef16..36f3f4d70f36 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -36,6 +36,11 @@ along with GCC; see the file COPYING3. If not see #include "reload.h" #include "cfgloop.h" +/* To prevent soft conflict detection becoming quadratic in the + loop depth. Only for very pathological cases, so it hardly + seems worth a --param. */ +const int max_soft_conflict_loop_depth = 64; + typedef struct allocno_hard_regs *allocno_hard_regs_t; /* The structure contains information about hard registers can be @@ -1707,6 +1712,167 @@ calculate_saved_nregs (int hard_regno, machine_mode mode) return nregs; } +/* Allocnos A1 and A2 are known to conflict. Check whether, in some loop L + that is either the current loop or a nested subloop, the conflict is of + the following form: + + - One allocno (X) is a cap allocno for some non-cap allocno X2. + + - X2 belongs to some loop L2. + + - The other allocno (Y) is a non-cap allocno. + + - Y is an ancestor of some allocno Y2 in L2. (Note that such a Y2 + must exist, given that X and Y conflict.) + + - Y2 is not referenced in L2 (that is, ALLOCNO_NREFS (Y2) == 0). + + - Y can use a different allocation from Y2. + + In this case, Y's register is live across L2 but is not used within it, + whereas X's register is used only within L2. The conflict is therefore + only "soft", in that it can easily be avoided by spilling Y2 inside L2 + without affecting any insn references. + + If the conflict does have this form, return the Y2 that would need to be + spilled in order to allow X and Y (and thus A1 and A2) to use the same + register. Return null otherwise. Returning null is conservatively correct; + any nonnnull return value is an optimization. */ +ira_allocno_t +ira_soft_conflict (ira_allocno_t a1, ira_allocno_t a2) +{ + /* Search for the loop L and its associated allocnos X and Y. */ + int search_depth = 0; + while (ALLOCNO_CAP_MEMBER (a1) && ALLOCNO_CAP_MEMBER (a2)) + { + a1 = ALLOCNO_CAP_MEMBER (a1); + a2 = ALLOCNO_CAP_MEMBER (a2); + if (search_depth++ > max_soft_conflict_loop_depth) + return nullptr; + } + /* This must be true if A1 and A2 conflict. */ + ira_assert (ALLOCNO_LOOP_TREE_NODE (a1) == ALLOCNO_LOOP_TREE_NODE (a2)); + + /* Make A1 the cap allocno (X in the comment above) and A2 the + non-cap allocno (Y in the comment above). */ + if (ALLOCNO_CAP_MEMBER (a2)) + std::swap (a1, a2); + if (!ALLOCNO_CAP_MEMBER (a1)) + return nullptr; + + /* Search for the real allocno that A1 caps (X2 in the comment above). */ + do + { + a1 = ALLOCNO_CAP_MEMBER (a1); + if (search_depth++ > max_soft_conflict_loop_depth) + return nullptr; + } + while (ALLOCNO_CAP_MEMBER (a1)); + + /* Find the associated allocno for A2 (Y2 in the comment above). */ + auto node = ALLOCNO_LOOP_TREE_NODE (a1); + auto local_a2 = node->regno_allocno_map[ALLOCNO_REGNO (a2)]; + + /* Find the parent of LOCAL_A2/Y2. LOCAL_A2 must be a descendant of A2 + for the conflict query to make sense, so this parent lookup must succeed. + + If the parent allocno has no references, it is usually cheaper to + spill at that loop level instead. Keep searching until we find + a parent allocno that does have references (but don't look past + the starting allocno). */ + ira_allocno_t local_parent_a2; + for (;;) + { + local_parent_a2 = ira_parent_allocno (local_a2); + if (local_parent_a2 == a2 || ALLOCNO_NREFS (local_parent_a2) != 0) + break; + local_a2 = local_parent_a2; + } + if (CHECKING_P) + { + /* Sanity check to make sure that the conflict we've been given + makes sense. */ + auto test_a2 = local_parent_a2; + while (test_a2 != a2) + { + test_a2 = ira_parent_allocno (test_a2); + ira_assert (test_a2); + } + } + if (local_a2 + && ALLOCNO_NREFS (local_a2) == 0 + && ira_subloop_allocnos_can_differ_p (local_parent_a2)) + return local_a2; + return nullptr; +} + +/* The caller has decided to allocate HREGNO to A and has proved that + this is safe. However, the allocation might require the kind of + spilling described in the comment above ira_soft_conflict. + The caller has recorded that: + + - The allocnos in ALLOCNOS_TO_SPILL are the ones that would need + to be spilled to satisfy soft conflicts for at least one allocation + (not necessarily HREGNO). + + - The soft conflicts apply only to A allocations that overlap + SOFT_CONFLICT_REGS. + + If allocating HREGNO is subject to any soft conflicts, record the + subloop allocnos that need to be spilled. */ +static void +spill_soft_conflicts (ira_allocno_t a, bitmap allocnos_to_spill, + HARD_REG_SET soft_conflict_regs, int hregno) +{ + auto nregs = hard_regno_nregs (hregno, ALLOCNO_MODE (a)); + bitmap_iterator bi; + unsigned int i; + EXECUTE_IF_SET_IN_BITMAP (allocnos_to_spill, 0, i, bi) + { + /* SPILL_A needs to be spilled for at least one allocation + (not necessarily this one). */ + auto spill_a = ira_allocnos[i]; + + /* Find the corresponding allocno for this loop. */ + auto conflict_a = spill_a; + do + { + conflict_a = ira_parent_or_cap_allocno (conflict_a); + ira_assert (conflict_a); + } + while (ALLOCNO_LOOP_TREE_NODE (conflict_a)->level + > ALLOCNO_LOOP_TREE_NODE (a)->level); + + ira_assert (ALLOCNO_LOOP_TREE_NODE (conflict_a) + == ALLOCNO_LOOP_TREE_NODE (a)); + + if (conflict_a == a) + { + /* SPILL_A is a descendant of A. We don't know (and don't need + to know) which cap allocnos have a soft conflict with A. + All we need to do is test whether the soft conflict applies + to the chosen allocation. */ + if (ira_hard_reg_set_intersection_p (hregno, ALLOCNO_MODE (a), + soft_conflict_regs)) + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true; + } + else + { + /* SPILL_A is a descendant of CONFLICT_A, which has a soft conflict + with A. Test whether the soft conflict applies to the current + allocation. */ + ira_assert (ira_soft_conflict (a, conflict_a) == spill_a); + auto conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a); + ira_assert (conflict_hregno >= 0); + auto conflict_nregs = hard_regno_nregs (conflict_hregno, + ALLOCNO_MODE (conflict_a)); + if (hregno + nregs > conflict_hregno + && conflict_hregno + conflict_nregs > hregno) + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true; + } + } +} + /* Choose a hard register for allocno A. If RETRY_P is TRUE, it means that the function called from function `ira_reassign_conflict_allocnos' and `allocno_reload_assign'. In @@ -1746,6 +1912,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) #ifdef STACK_REGS bool no_stack_reg_p; #endif + auto_bitmap allocnos_to_spill; + HARD_REG_SET soft_conflict_regs = {}; ira_assert (! ALLOCNO_ASSIGNED_P (a)); get_conflict_and_start_profitable_regs (a, retry_p, @@ -1833,23 +2001,56 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) mode = ALLOCNO_MODE (conflict_a); conflict_nregs = hard_regno_nregs (hard_regno, mode); - if (conflict_nregs == n_objects && conflict_nregs > 1) + auto spill_a = (retry_p + ? nullptr + : ira_soft_conflict (a, conflict_a)); + if (spill_a) { - int num = OBJECT_SUBWORD (conflict_obj); - - if (REG_WORDS_BIG_ENDIAN) - SET_HARD_REG_BIT (conflicting_regs[word], - hard_regno + n_objects - num - 1); - else - SET_HARD_REG_BIT (conflicting_regs[word], - hard_regno + num); + if (bitmap_set_bit (allocnos_to_spill, + ALLOCNO_NUM (spill_a))) + { + ira_loop_border_costs border_costs (spill_a); + auto cost = border_costs.spill_inside_loop_cost (); + auto note_conflict = [&](int r) + { + SET_HARD_REG_BIT (soft_conflict_regs, r); + auto hri = ira_class_hard_reg_index[aclass][r]; + if (hri >= 0) + { + costs[hri] += cost; + full_costs[hri] += cost; + } + }; + for (int r = hard_regno; + r >= 0 && (int) end_hard_regno (mode, r) > hard_regno; + r--) + note_conflict (r); + for (int r = hard_regno + 1; + r < hard_regno + conflict_nregs; + r++) + note_conflict (r); + } } else - conflicting_regs[word] - |= ira_reg_mode_hard_regset[hard_regno][mode]; - if (hard_reg_set_subset_p (profitable_hard_regs, - conflicting_regs[word])) - goto fail; + { + if (conflict_nregs == n_objects && conflict_nregs > 1) + { + int num = OBJECT_SUBWORD (conflict_obj); + + if (REG_WORDS_BIG_ENDIAN) + SET_HARD_REG_BIT (conflicting_regs[word], + hard_regno + n_objects - num - 1); + else + SET_HARD_REG_BIT (conflicting_regs[word], + hard_regno + num); + } + else + conflicting_regs[word] + |= ira_reg_mode_hard_regset[hard_regno][mode]; + if (hard_reg_set_subset_p (profitable_hard_regs, + conflicting_regs[word])) + goto fail; + } } } else if (! retry_p @@ -1962,6 +2163,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) { for (i = hard_regno_nregs (best_hard_regno, mode) - 1; i >= 0; i--) allocated_hardreg_p[best_hard_regno + i] = true; + spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, + best_hard_regno); } if (! retry_p) restore_costs_from_copies (a); @@ -2983,6 +3186,8 @@ improve_allocation (void) assigning hard register to allocno A even without spilling conflicting allocnos. */ continue; + auto_bitmap allocnos_to_spill; + HARD_REG_SET soft_conflict_regs = {}; mode = ALLOCNO_MODE (a); nwords = ALLOCNO_NUM_OBJECTS (a); /* Process each allocno conflicting with A and update the cost @@ -3008,31 +3213,49 @@ improve_allocation (void) ALLOCNO_COLOR_DATA (conflict_a)->temp = check; if ((conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a)) < 0) continue; - spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a); - k = (ira_class_hard_reg_index - [ALLOCNO_CLASS (conflict_a)][conflict_hregno]); - ira_assert (k >= 0); - if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a)) - != NULL) - spill_cost -= allocno_costs[k]; + auto spill_a = ira_soft_conflict (a, conflict_a); + if (spill_a) + { + if (!bitmap_set_bit (allocnos_to_spill, + ALLOCNO_NUM (spill_a))) + continue; + ira_loop_border_costs border_costs (spill_a); + spill_cost = border_costs.spill_inside_loop_cost (); + } else - spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a); - spill_cost - += allocno_copy_cost_saving (conflict_a, conflict_hregno); + { + spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a); + k = (ira_class_hard_reg_index + [ALLOCNO_CLASS (conflict_a)][conflict_hregno]); + ira_assert (k >= 0); + if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a)) + != NULL) + spill_cost -= allocno_costs[k]; + else + spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a); + spill_cost + += allocno_copy_cost_saving (conflict_a, conflict_hregno); + } conflict_nregs = hard_regno_nregs (conflict_hregno, ALLOCNO_MODE (conflict_a)); + auto note_conflict = [&](int r) + { + if (check_hard_reg_p (a, r, + conflicting_regs, profitable_hard_regs)) + { + if (spill_a) + SET_HARD_REG_BIT (soft_conflict_regs, r); + costs[r] += spill_cost; + } + }; for (r = conflict_hregno; r >= 0 && (int) end_hard_regno (mode, r) > conflict_hregno; r--) - if (check_hard_reg_p (a, r, - conflicting_regs, profitable_hard_regs)) - costs[r] += spill_cost; + note_conflict (r); for (r = conflict_hregno + 1; r < conflict_hregno + conflict_nregs; r++) - if (check_hard_reg_p (a, r, - conflicting_regs, profitable_hard_regs)) - costs[r] += spill_cost; + note_conflict (r); } } min_cost = INT_MAX; @@ -3055,6 +3278,7 @@ improve_allocation (void) by spilling some conflicting allocnos does not improve the allocation cost. */ continue; + spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, best); nregs = hard_regno_nregs (best, mode); /* Now spill conflicting allocnos which contain a hard register of A when we assign the best chosen hard register to it. */ diff --git a/gcc/ira-int.h b/gcc/ira-int.h index a78811eb416b..e1e680252114 100644 --- a/gcc/ira-int.h +++ b/gcc/ira-int.h @@ -1067,6 +1067,7 @@ extern void ira_debug_conflicts (bool); extern void ira_build_conflicts (void); /* ira-color.c */ +extern ira_allocno_t ira_soft_conflict (ira_allocno_t, ira_allocno_t); extern void ira_debug_hard_regs_forest (void); extern int ira_loop_edge_freq (ira_loop_tree_node_t, int, bool); extern void ira_reassign_conflict_allocnos (int); diff --git a/gcc/ira.c b/gcc/ira.c index 6f65d20cfd98..3bece668692b 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -2649,6 +2649,8 @@ check_allocation (void) int conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a); if (conflict_hard_regno < 0) continue; + if (ira_soft_conflict (a, conflict_a)) + continue; conflict_nregs = hard_regno_nregs (conflict_hard_regno, ALLOCNO_MODE (conflict_a)); diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c new file mode 100644 index 000000000000..ceb6f50de2dc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c @@ -0,0 +1,69 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define PROB 0.1 + +struct L +{ + int data; + volatile struct L *next; + volatile struct L *inner; +}; + +/* The thing we're testing here is that the !head->inner path of the outer loop + body has no stack accesses. It's possible that we'll need to update this + pattern for unrelated code changes. but the test should be XFAILed rather + than changed if any new stack accesses occur on the !head->inner path. */ +/* +** foo: +** ... +** ldr (w[0-9]+), \[(x[0-9]+)\] +** add (w[0-9]+), (?:\3, \1|\1, \3) +** ldr (x[0-9]+), \[\2, #?16\] +** str \3, \[\2\] +** ldr \2, \[\2, #?8\] +** cbn?z \4, .* +** ... +** ret +*/ +void +foo (volatile struct L *head, int inc) +{ + while (head) + { + /* Clobber all call-preserved GPRs, so that the loop has to use + call-clobbered GPRs if it is to avoid spilling. */ + asm volatile ("" ::: + "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + inc = head->data + inc; + volatile struct L *inner = head->inner; + head->data = inc; + head = head->next; + if (__builtin_expect_with_probability (inner != 0, 0, PROB)) + for (int i = 0; i < 1000; ++i) + asm volatile ("" :: /* example allocation: */ + "r" (i), /* x0 */ + "r" (inner), /* x1 */ + "r" (inner->next), /* x2 */ + "r" (inner->next), /* x3 */ + "r" (inner->next), /* x4 */ + "r" (inner->next), /* x5 */ + "r" (inner->next), /* x6 */ + "r" (inner->next), /* x7 */ + "r" (inner->next), /* x8 */ + "r" (inner->next), /* x9 */ + "r" (inner->next), /* x10 */ + "r" (inner->next), /* x11 */ + "r" (inner->next), /* x12 */ + "r" (inner->next), /* x13 */ + "r" (inner->next), /* x14 */ + "r" (inner->next), /* x15 */ + "r" (inner->next), /* x16 */ + "r" (inner->next), /* x17 */ + "r" (inner->next), /* x18 */ + "r" (inner->next) : /* x30 */ + "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + } +}