Skip to content

Commit 58f3cbb

Browse files
committed
[xstormy16] Recognize/support swpn (swap nibbles) instruction.
This patch adds support for xstormy16's swap nibbles instruction (swpn). For the test case: short foo(short x) { return (x&0xff00) | ((x<<4)&0xf0) | ((x>>4)&0x0f); } GCC with -O2 currently generates the nine instruction sequence: foo: mov r7,r2 asr r2,#4 and r2,gcc-mirror#15 mov.w r6,#-256 and r6,r7 or r2,r6 shl r7,#4 and r7,#255 or r2,r7 ret with this patch, we now generate: foo: swpn r2 ret To achieve this using combine's four instruction "combinations" requires a little wizardry. Firstly, define_insn_and_split are introduced to treat logical shifts followed by bitwise-AND as macro instructions that are split after reload. This is sufficient to recognize a QImode nibble swap, which can be implemented by swpn followed by either a zero-extension or a sign-extension from QImode to HImode. Then finally, in the correct context, a QImode swap-nibbles pattern can be combined to preserve the high-byte of a HImode word, matching the xstormy16's swpn semantics. The naming of the new code iterators is taken from i386.md. 2023-04-29 Roger Sayle <[email protected]> gcc/ChangeLog * config/stormy16/stormy16.md (any_lshift): New code iterator. (any_or_plus): Likewise. (any_rotate): Likewise. (*<any_lshift>_and_internal): New define_insn_and_split to recognize a logical shift followed by an AND, and split it again after reload. (*swpn): New define_insn matching xstormy16's swpn. (*swpn_zext): New define_insn recognizing swpn followed by zero_extendqihi2, i.e. with the high byte set to zero. (*swpn_sext): Likewise, for swpn followed by cbw. (*swpn_sext_2): Likewise, for an alternate RTL form. (*swpn_zext_ior): A pre-reload splitter so that an swpn+zext+ior sequence is split in the correct place to recognize the *swpn_zext followed by any_or_plus (ior, xor or plus) instruction. gcc/testsuite/ChangeLog * gcc.target/xstormy16/swpn-1.c: New QImode test case. * gcc.target/xstormy16/swpn-2.c: New zero_extend test case. * gcc.target/xstormy16/swpn-3.c: New sign_extend test case. * gcc.target/xstormy16/swpn-4.c: New HImode test case.
1 parent 83c78cb commit 58f3cbb

File tree

5 files changed

+164
-0
lines changed

5 files changed

+164
-0
lines changed

gcc/config/stormy16/stormy16.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@
4848
(CARRY_REG 16)
4949
]
5050
)
51+
52+
(define_code_iterator any_lshift [ashift lshiftrt])
53+
(define_code_iterator any_or_plus [plus ior xor])
54+
(define_code_iterator any_rotate [rotate rotatert])
5155

5256
;; ::::::::::::::::::::
5357
;; ::
@@ -1301,3 +1305,86 @@
13011305
[(parallel [(set (match_dup 2) (match_dup 1))
13021306
(set (match_dup 1) (match_dup 2))])])
13031307

1308+
;; Recognize shl+and and shr+and as macro instructions.
1309+
(define_insn_and_split "*<code>_and_internal"
1310+
[(set (match_operand:HI 0 "register_operand" "=r")
1311+
(and:HI (any_lshift:HI (match_operand 1 "register_operand" "0")
1312+
(match_operand 2 "const_int_operand" "i"))
1313+
(match_operand 3 "const_int_operand" "i")))
1314+
(clobber (reg:BI CARRY_REG))]
1315+
"IN_RANGE (INTVAL (operands[2]), 0, 15)"
1316+
"#"
1317+
"reload_completed"
1318+
[(parallel [(set (match_dup 0) (any_lshift:HI (match_dup 1) (match_dup 2)))
1319+
(clobber (reg:BI CARRY_REG))])
1320+
(set (match_dup 0) (and:HI (match_dup 0) (match_dup 3)))])
1321+
1322+
;; Swap nibbles instruction
1323+
(define_insn "*swpn"
1324+
[(set (match_operand:HI 0 "register_operand" "=r")
1325+
(any_or_plus:HI
1326+
(any_or_plus:HI
1327+
(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
1328+
(const_int 4))
1329+
(const_int 240))
1330+
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
1331+
(const_int 15)))
1332+
(and:HI (match_dup 1) (const_int -256))))]
1333+
""
1334+
"swpn %0")
1335+
1336+
(define_insn "*swpn_zext"
1337+
[(set (match_operand:HI 0 "register_operand" "=r")
1338+
(any_or_plus:HI
1339+
(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
1340+
(const_int 4))
1341+
(const_int 240))
1342+
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
1343+
(const_int 15))))]
1344+
""
1345+
"swpn %0 | and %0,#255"
1346+
[(set_attr "length" "6")])
1347+
1348+
(define_insn "*swpn_sext"
1349+
[(set (match_operand:HI 0 "register_operand" "=r")
1350+
(sign_extend:HI
1351+
(rotate:QI (subreg:QI (match_operand:HI 1 "register_operand" "0") 0)
1352+
(const_int 4))))]
1353+
""
1354+
"swpn %0 | cbw %0"
1355+
[(set_attr "length" "4")])
1356+
1357+
(define_insn "*swpn_sext_2"
1358+
[(set (match_operand:HI 0 "register_operand" "=r")
1359+
(sign_extend:HI
1360+
(subreg:QI
1361+
(any_or_plus:HI
1362+
(ashift:HI (match_operand:HI 1 "register_operand" "0")
1363+
(const_int 4))
1364+
(subreg:HI (lshiftrt:QI (subreg:QI (match_dup 1) 0)
1365+
(const_int 4)) 0)) 0)))]
1366+
""
1367+
"swpn %0 | cbw %0"
1368+
[(set_attr "length" "4")])
1369+
1370+
;; Recognize swpn_zext+ior as a macro instruction.
1371+
(define_insn_and_split "*swpn_zext_ior"
1372+
[(set (match_operand:HI 0 "register_operand")
1373+
(any_or_plus:HI
1374+
(any_or_plus:HI
1375+
(and:HI (ashift:HI (match_operand:HI 1 "register_operand")
1376+
(const_int 4))
1377+
(const_int 240))
1378+
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
1379+
(const_int 15)))
1380+
(match_operand:HI 2 "nonmemory_operand")))]
1381+
"can_create_pseudo_p ()"
1382+
"#"
1383+
"&& 1"
1384+
[(set (match_dup 3) (ior:HI (and:HI (ashift:HI (match_dup 1) (const_int 4))
1385+
(const_int 240))
1386+
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
1387+
(const_int 15))))
1388+
(set (match_dup 0) (ior:HI (match_dup 3) (match_dup 2)))]
1389+
"operands[3] = gen_reg_rtx (HImode);")
1390+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2" } */
3+
unsigned char ior_1(unsigned char x) { return (x>>4) | (x<<4); }
4+
unsigned char ior_2(unsigned char x) { return (x<<4) | (x>>4); }
5+
unsigned char xor_1(unsigned char x) { return (x>>4) ^ (x<<4); }
6+
unsigned char xor_2(unsigned char x) { return (x<<4) ^ (x>>4); }
7+
unsigned char sum_1(unsigned char x) { return (x>>4) + (x<<4); }
8+
unsigned char sum_2(unsigned char x) { return (x<<4) + (x>>4); }
9+
/* { dg-final { scan-assembler-times "swpn r2" 6 } } */
10+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2" } */
3+
4+
unsigned short ior_1(unsigned short x) { return ((x&0xf0)>>4) | ((x&0x0f)<<4); }
5+
unsigned short xor_1(unsigned short x) { return ((x&0xf0)>>4) ^ ((x&0x0f)<<4); }
6+
unsigned short sum_1(unsigned short x) { return ((x&0xf0)>>4) + ((x&0x0f)<<4); }
7+
8+
unsigned short ior_2(unsigned short x) { return ((x&0x0f)<<4) | ((x&0xf0)>>4); }
9+
unsigned short xor_2(unsigned short x) { return ((x&0x0f)<<4) ^ ((x&0xf0)>>4); }
10+
unsigned short sum_2(unsigned short x) { return ((x&0x0f)<<4) + ((x&0xf0)>>4); }
11+
12+
/* { dg-final { scan-assembler-times "swpn r2" 6 } } */
13+
/* { dg-final { scan-assembler-times "and r2,#255" 6 } } */
14+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2" } */
3+
4+
short ior_1(unsigned short x) {
5+
return (signed char)(((x&0xf0)>>4) | ((x&0x0f)<<4));
6+
}
7+
8+
short xor_1(unsigned short x) {
9+
return (signed char)(((x&0xf0)>>4) ^ ((x&0x0f)<<4));
10+
}
11+
12+
short sum_1(unsigned short x) {
13+
return (signed char)(((x&0xf0)>>4) + ((x&0x0f)<<4));
14+
}
15+
16+
short ior_2(unsigned short x) {
17+
return (signed char)(((x&0x0f)<<4) | ((x&0xf0)>>4));
18+
}
19+
20+
short xor_2(unsigned short x) {
21+
return (signed char)(((x&0x0f)<<4) ^ ((x&0xf0)>>4));
22+
}
23+
24+
short sum_2(unsigned short x) {
25+
return (signed char)(((x&0x0f)<<4) + ((x&0xf0)>>4));
26+
}
27+
28+
/* { dg-final { scan-assembler-times "cbw" 6 } } */
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2" } */
3+
4+
short ior_abc(short x) { return (x&0xff00) | ((x<<4)&0xf0) | ((x>>4)&0x0f); }
5+
short ior_acb(short x) { return (x&0xff00) | ((x>>4)&0x0f) | ((x<<4)&0xf0); }
6+
short ior_bac(short x) { return ((x<<4)&0xf0) | (x&0xff00) | ((x>>4)&0x0f); }
7+
short ior_bca(short x) { return ((x<<4)&0xf0) | ((x>>4)&0x0f) | (x&0xff00); }
8+
short ior_cab(short x) { return ((x>>4)&0x0f) | (x&0xff00) | ((x<<4)&0xf0); }
9+
short ior_cba(short x) { return ((x>>4)&0x0f) | ((x<<4)&0xf0) | (x&0xff00); }
10+
11+
short xor_abc(short x) { return (x&0xff00) ^ ((x<<4)&0xf0) ^ ((x>>4)&0x0f); }
12+
short xor_acb(short x) { return (x&0xff00) ^ ((x>>4)&0x0f) ^ ((x<<4)&0xf0); }
13+
short xor_bac(short x) { return ((x<<4)&0xf0) ^ (x&0xff00) ^ ((x>>4)&0x0f); }
14+
short xor_bca(short x) { return ((x<<4)&0xf0) ^ ((x>>4)&0x0f) ^ (x&0xff00); }
15+
short xor_cab(short x) { return ((x>>4)&0x0f) ^ (x&0xff00) ^ ((x<<4)&0xf0); }
16+
short xor_cba(short x) { return ((x>>4)&0x0f) ^ ((x<<4)&0xf0) ^ (x&0xff00); }
17+
18+
short sum_abc(short x) { return (x&0xff00) + ((x<<4)&0xf0) + ((x>>4)&0x0f); }
19+
short sum_acb(short x) { return (x&0xff00) + ((x>>4)&0x0f) + ((x<<4)&0xf0); }
20+
short sum_bac(short x) { return ((x<<4)&0xf0) + (x&0xff00) + ((x>>4)&0x0f); }
21+
short sum_bca(short x) { return ((x<<4)&0xf0) + ((x>>4)&0x0f) + (x&0xff00); }
22+
short sum_cab(short x) { return ((x>>4)&0x0f) + (x&0xff00) + ((x<<4)&0xf0); }
23+
short sum_cba(short x) { return ((x>>4)&0x0f) + ((x<<4)&0xf0) + (x&0xff00); }
24+
25+
/* { dg-final { scan-assembler-times "swpn r2" 18 } } */

0 commit comments

Comments
 (0)