Skip to content

Commit 650c36e

Browse files
committed
PR rtl-optimization/109476: Use ZERO_EXTEND instead of zeroing a SUBREG.
This patch fixes PR rtl-optimization/109476, which is a code quality regression affecting AVR. The cause is that the lower-subreg pass is sometimes overly aggressive, lowering the LSHIFTRT below: (insn 7 4 8 2 (set (reg:HI 51) (lshiftrt:HI (reg/v:HI 49 [ b ]) (const_int 8 [0x8]))) "t.ii":4:36 557 {lshrhi3} (nil)) into a pair of QImode SUBREG assignments: (insn 19 4 20 2 (set (subreg:QI (reg:HI 51) 0) (reg:QI 54 [ b+1 ])) "t.ii":4:36 86 {movqi_insn_split} (nil)) (insn 20 19 8 2 (set (subreg:QI (reg:HI 51) 1) (const_int 0 [0])) "t.ii":4:36 86 {movqi_insn_split} (nil)) but this idiom, SETs of SUBREGs, interferes with combine's ability to associate/fuse instructions. The solution, on targets that have a suitable ZERO_EXTEND (i.e. where the lower-subreg pass wouldn't itself split a ZERO_EXTEND, so "splitting_zext" is false), is to split/lower LSHIFTRT to a ZERO_EXTEND. To answer Richard's question in comment gcc-mirror#10 of the bugzilla PR, the function resolve_shift_zext is called with one of four RTX codes, ASHIFTRT, LSHIFTRT, ZERO_EXTEND and ASHIFT, but only with LSHIFTRT can the setting of low_part and high_part SUBREGs be replaced by a ZERO_EXTEND. For ASHIFTRT, we require a sign extension, so don't set the high_part to zero; if we're splitting a ZERO_EXTEND then it doesn't make sense to replace it with a ZERO_EXTEND, and for ASHIFT we've played games to swap the high_part and low_part SUBREGs, so that we assign the low_part to zero (for double word shifts by greater than word size bits). 2023-04-28 Roger Sayle <[email protected]> gcc/ChangeLog PR rtl-optimization/109476 * lower-subreg.cc: Include explow.h for force_reg. (find_decomposable_shift_zext): Pass an additional SPEED_P argument. If decomposing a suitable LSHIFTRT and we're not splitting ZERO_EXTEND (based on the current SPEED_P), then use a ZERO_EXTEND instead of setting a high part SUBREG to zero, which helps combine. (decompose_multiword_subregs): Update call to resolve_shift_zext. gcc/testsuite/ChangeLog PR rtl-optimization/109476 * gcc.target/avr/mmcu/pr109476.c: New test case.
1 parent fde0058 commit 650c36e

File tree

2 files changed

+39
-11
lines changed

2 files changed

+39
-11
lines changed

gcc/lower-subreg.cc

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
3737
#include "cfgbuild.h"
3838
#include "dce.h"
3939
#include "expr.h"
40+
#include "explow.h"
4041
#include "tree-pass.h"
4142
#include "lower-subreg.h"
4243
#include "rtl-iter.h"
@@ -1299,11 +1300,12 @@ find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
12991300

13001301
/* Decompose a more than word wide shift (in INSN) of a multiword
13011302
pseudo or a multiword zero-extend of a wordmode pseudo into a move
1302-
and 'set to zero' insn. Return a pointer to the new insn when a
1303-
replacement was done. */
1303+
and 'set to zero' insn. SPEED_P says whether we are optimizing
1304+
for speed or size, when checking if a ZERO_EXTEND is preferable.
1305+
Return a pointer to the new insn when a replacement was done. */
13041306

13051307
static rtx_insn *
1306-
resolve_shift_zext (rtx_insn *insn)
1308+
resolve_shift_zext (rtx_insn *insn, bool speed_p)
13071309
{
13081310
rtx set;
13091311
rtx op;
@@ -1378,14 +1380,29 @@ resolve_shift_zext (rtx_insn *insn)
13781380
dest_reg, GET_CODE (op) != ASHIFTRT);
13791381
}
13801382

1381-
if (dest_reg != src_reg)
1382-
emit_move_insn (dest_reg, src_reg);
1383-
if (GET_CODE (op) != ASHIFTRT)
1384-
emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1385-
else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1386-
emit_move_insn (dest_upper, copy_rtx (src_reg));
1383+
/* Consider using ZERO_EXTEND instead of setting DEST_UPPER to zero
1384+
if this is considered reasonable. */
1385+
if (GET_CODE (op) == LSHIFTRT
1386+
&& GET_MODE (op) == twice_word_mode
1387+
&& REG_P (SET_DEST (set))
1388+
&& !choices[speed_p].splitting_zext)
1389+
{
1390+
rtx tmp = force_reg (word_mode, copy_rtx (src_reg));
1391+
tmp = simplify_gen_unary (ZERO_EXTEND, twice_word_mode, tmp, word_mode);
1392+
emit_move_insn (SET_DEST (set), tmp);
1393+
}
13871394
else
1388-
emit_move_insn (dest_upper, upper_src);
1395+
{
1396+
if (dest_reg != src_reg)
1397+
emit_move_insn (dest_reg, src_reg);
1398+
if (GET_CODE (op) != ASHIFTRT)
1399+
emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1400+
else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1401+
emit_move_insn (dest_upper, copy_rtx (src_reg));
1402+
else
1403+
emit_move_insn (dest_upper, upper_src);
1404+
}
1405+
13891406
insns = get_insns ();
13901407

13911408
end_sequence ();
@@ -1670,7 +1687,7 @@ decompose_multiword_subregs (bool decompose_copies)
16701687
{
16711688
rtx_insn *decomposed_shift;
16721689

1673-
decomposed_shift = resolve_shift_zext (insn);
1690+
decomposed_shift = resolve_shift_zext (insn, speed_p);
16741691
if (decomposed_shift != NULL_RTX)
16751692
{
16761693
insn = decomposed_shift;
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-Os -mmcu=avrxmega3" } */
3+
4+
unsigned short foo(unsigned char a, unsigned short b) {
5+
return (unsigned char)((b >> 8) + 0) * a ;
6+
}
7+
8+
/* { dg-final { scan-assembler-times "mul" 1 } } */
9+
/* { dg-final { scan-assembler-times "mov" 1 } } */
10+
/* { dg-final { scan-assembler-not "add" } } */
11+
/* { dg-final { scan-assembler-not "ldi" } } */

0 commit comments

Comments
 (0)