Skip to content

Commit b3c6de9

Browse files
pmurlaboger
authored andcommitted
cmd/internal/obj/ppc64: allow VR register arguments to VS registers
Likewise, reorder register numbers such that extended mnemonics which use FPR arguments can be transparently encoded as a VSR argument for the move to/from VSR class of instructions. Specifically, ensure the following holds for all FPx and VRx constants: FPRx & 63 == x, and VRx & 63 == x + 32. This simplifies encoding machine instructions, and likewise helps ppc64 assembly writers to avoid hokey workarounds when switching from vector to vector-scalar register notation. Notably, many VSX instructions are limited to vector operands due to encoding restrictions. Secondly, this explicitly rejects dubious usages of the m[tf]vsr family of instructions which had previously been accepted. * Reject two GPR arguments for non-MTVSRDD opcodes. These have no defined behavior today, and may set RFU bits. e.g MTVSRD R1, R2, VS1 * Reject FPR destinations for MTVSRDD, and only accept with two GPR arguments. This copies two GPR values into either half of a VSR. e.g MTVSRDD R1, R2, F1 MTVSRDD R1, F1 Change-Id: If13dd88c3791d1892dbd18ef0e34675a5285fff9 Reviewed-on: https://go-review.googlesource.com/c/go/+/342929 Run-TryBot: Paul Murphy <[email protected]> TryBot-Result: Go Bot <[email protected]> Trust: Lynn Boger <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent ee91bb8 commit b3c6de9

File tree

4 files changed

+87
-64
lines changed

4 files changed

+87
-64
lines changed

src/cmd/asm/internal/asm/testdata/ppc64.s

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
649649
LXVB16X (R3)(R4), VS1 // 7c241ed8
650650
LXVW4X (R3)(R4), VS1 // 7c241e18
651651
LXV 16(R3), VS1 // f4230011
652+
LXV 16(R3), VS33 // f4230019
653+
LXV 16(R3), V1 // f4230019
652654
LXVL R3, R4, VS1 // 7c23221a
653655
LXVLL R3, R4, VS1 // 7c23225a
654656
LXVX R3, R4, VS1 // 7c232218
@@ -668,8 +670,13 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
668670
MTFPRD R3, F0 // 7c030166
669671
MFVRD V0, R3 // 7c030067
670672
MFVSRLD VS63,R4 // 7fe40267
673+
MFVSRLD V31,R4 // 7fe40267
671674
MFVSRWZ VS33,R4 // 7c2400e7
675+
MFVSRWZ V1,R4 // 7c2400e7
672676
MTVSRD R3, VS1 // 7c230166
677+
MTVSRDD R3, R4, VS1 // 7c232366
678+
MTVSRDD R3, R4, VS33 // 7c232367
679+
MTVSRDD R3, R4, V1 // 7c232367
673680
MTVRD R3, V13 // 7da30167
674681
MTVSRWA R4, VS31 // 7fe401a6
675682
MTVSRWS R4, VS32 // 7c040327
@@ -678,6 +685,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
678685
XXBRW VS1, VS2 // f04f0f6c
679686
XXBRH VS2, VS3 // f067176c
680687
XXLAND VS1, VS2, VS3 // f0611410
688+
XXLAND V1, V2, V3 // f0611417
689+
XXLAND VS33, VS34, VS35 // f0611417
681690
XXLANDC VS1, VS2, VS3 // f0611450
682691
XXLEQV VS0, VS1, VS2 // f0400dd0
683692
XXLNAND VS0, VS1, VS2 // f0400d90
@@ -687,11 +696,17 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
687696
XXLORQ VS1, VS2, VS3 // f0611490
688697
XXLXOR VS1, VS2, VS3 // f06114d0
689698
XXSEL VS1, VS2, VS3, VS4 // f08110f0
699+
XXSEL VS33, VS34, VS35, VS36 // f08110ff
700+
XXSEL V1, V2, V3, V4 // f08110ff
690701
XXMRGHW VS1, VS2, VS3 // f0611090
691702
XXMRGLW VS1, VS2, VS3 // f0611190
692703
XXSPLTW VS1, $1, VS2 // f0410a90
704+
XXSPLTW VS33, $1, VS34 // f0410a93
705+
XXSPLTW V1, $1, V2 // f0410a93
693706
XXPERM VS1, VS2, VS3 // f06110d0
694707
XXSLDWI VS1, VS2, $1, VS3 // f0611110
708+
XXSLDWI V1, V2, $1, V3 // f0611117
709+
XXSLDWI VS33, VS34, $1, VS35 // f0611117
695710
XSCVDPSP VS1, VS2 // f0400c24
696711
XVCVDPSP VS1, VS2 // f0400e24
697712
XSCVSXDDP VS1, VS2 // f0400de0

src/cmd/internal/obj/ppc64/a.out.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,10 @@ const (
7979
REG_R30
8080
REG_R31
8181

82-
/* F0=4128 ... F31=4159 */
83-
REG_F0
82+
/* Align FPR and VSR vectors such that when masked with 0x3F they produce
83+
an equivalent VSX register. */
84+
/* F0=4160 ... F31=4191 */
85+
REG_F0 = obj.RBasePPC64 + iota + 32
8486
REG_F1
8587
REG_F2
8688
REG_F3
@@ -113,7 +115,7 @@ const (
113115
REG_F30
114116
REG_F31
115117

116-
/* V0=4160 ... V31=4191 */
118+
/* V0=4192 ... V31=4223 */
117119
REG_V0
118120
REG_V1
119121
REG_V2
@@ -147,7 +149,7 @@ const (
147149
REG_V30
148150
REG_V31
149151

150-
/* VS0=4192 ... VS63=4255 */
152+
/* VS0=4224 ... VS63=4287 */
151153
REG_VS0
152154
REG_VS1
153155
REG_VS2

src/cmd/internal/obj/ppc64/asm9.go

Lines changed: 25 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -428,15 +428,13 @@ var optab = []Optab{
428428
{as: ASTXSIWX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */
429429

430430
/* VSX move from VSR */
431-
{as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, /* vsx move from vsr, xx1-form */
431+
{as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4},
432432
{as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4},
433-
{as: AMFVSRD, a1: C_VREG, a6: C_REG, type_: 88, size: 4},
434433

435434
/* VSX move to VSR */
436-
{as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 88, size: 4}, /* vsx move to vsr, xx1-form */
437-
{as: AMTVSRD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 88, size: 4},
438-
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 88, size: 4},
439-
{as: AMTVSRD, a1: C_REG, a6: C_VREG, type_: 88, size: 4},
435+
{as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 104, size: 4},
436+
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
437+
{as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
440438

441439
/* VSX logical */
442440
{as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
@@ -1036,13 +1034,14 @@ func (c *ctxt9) oplook(p *obj.Prog) *Optab {
10361034
// c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6)
10371035
ops := oprange[p.As&obj.AMask]
10381036
c1 := &xcmp[a1]
1037+
c2 := &xcmp[a2]
10391038
c3 := &xcmp[a3]
10401039
c4 := &xcmp[a4]
10411040
c5 := &xcmp[a5]
10421041
c6 := &xcmp[a6]
10431042
for i := range ops {
10441043
op := &ops[i]
1045-
if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
1044+
if c1[op.a1] && c2[op.a2] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
10461045
p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
10471046
return op
10481047
}
@@ -1116,6 +1115,12 @@ func cmp(a int, b int) bool {
11161115
return r0iszero != 0 /*TypeKind(100016)*/
11171116
}
11181117

1118+
case C_VSREG:
1119+
/* Allow any VR argument as a VSR operand. */
1120+
if b == C_VREG {
1121+
return true
1122+
}
1123+
11191124
case C_ANY:
11201125
return true
11211126
}
@@ -1594,7 +1599,6 @@ func buildop(ctxt *obj.Link) {
15941599
opset(AMTVRD, r0)
15951600
opset(AMTVSRWA, r0)
15961601
opset(AMTVSRWZ, r0)
1597-
opset(AMTVSRDD, r0)
15981602
opset(AMTVSRWS, r0)
15991603

16001604
case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
@@ -1977,6 +1981,7 @@ func buildop(ctxt *obj.Link) {
19771981
ACMPEQB,
19781982
AECIWX,
19791983
ACLRLSLWI,
1984+
AMTVSRDD,
19801985
obj.ANOP,
19811986
obj.ATEXT,
19821987
obj.AUNDEF,
@@ -2075,58 +2080,40 @@ func AOP_IR(op uint32, d uint32, simm uint32) uint32 {
20752080
}
20762081

20772082
/* XX1-form 3-register operands, 1 VSR operand */
2078-
func AOP_XX1(op uint32, d uint32, a uint32, b uint32) uint32 {
2079-
/* For the XX-form encodings, we need the VSX register number to be exactly */
2080-
/* between 0-63, so we can properly set the rightmost bits. */
2081-
r := d - REG_VS0
2083+
func AOP_XX1(op uint32, r uint32, a uint32, b uint32) uint32 {
20822084
return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5
20832085
}
20842086

20852087
/* XX2-form 3-register operands, 2 VSR operands */
2086-
func AOP_XX2(op uint32, d uint32, a uint32, b uint32) uint32 {
2087-
xt := d - REG_VS0
2088-
xb := b - REG_VS0
2088+
func AOP_XX2(op uint32, xt uint32, a uint32, xb uint32) uint32 {
20892089
return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5
20902090
}
20912091

20922092
/* XX3-form 3 VSR operands */
2093-
func AOP_XX3(op uint32, d uint32, a uint32, b uint32) uint32 {
2094-
xt := d - REG_VS0
2095-
xa := a - REG_VS0
2096-
xb := b - REG_VS0
2093+
func AOP_XX3(op uint32, xt uint32, xa uint32, xb uint32) uint32 {
20972094
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
20982095
}
20992096

21002097
/* XX3-form 3 VSR operands + immediate */
2101-
func AOP_XX3I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
2102-
xt := d - REG_VS0
2103-
xa := a - REG_VS0
2104-
xb := b - REG_VS0
2098+
func AOP_XX3I(op uint32, xt uint32, xa uint32, xb uint32, c uint32) uint32 {
21052099
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
21062100
}
21072101

21082102
/* XX4-form, 4 VSR operands */
2109-
func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
2110-
xt := d - REG_VS0
2111-
xa := a - REG_VS0
2112-
xb := b - REG_VS0
2113-
xc := c - REG_VS0
2103+
func AOP_XX4(op uint32, xt uint32, xa uint32, xb uint32, xc uint32) uint32 {
21142104
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
21152105
}
21162106

21172107
/* DQ-form, VSR register, register + offset operands */
2118-
func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
2119-
/* For the DQ-form encodings, we need the VSX register number to be exactly */
2120-
/* between 0-63, so we can properly set the SX bit. */
2121-
r := d - REG_VS0
2108+
func AOP_DQ(op uint32, xt uint32, a uint32, b uint32) uint32 {
21222109
/* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */
21232110
/* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */
21242111
/* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */
21252112
/* not -2047 or 2047), so 'b' needs to be adjusted to the expected 12-bit DQ value. Bear in mind that */
21262113
/* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */
21272114
/* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */
21282115
dq := b >> 4
2129-
return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2
2116+
return op | (xt&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (xt&32)>>2
21302117
}
21312118

21322119
/* Z23-form, 3-register operands + CY field */
@@ -3586,33 +3573,8 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
35863573
/* 3-register operand order: (RB)(RA*1), XT */
35873574
o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg))
35883575

3589-
case 88: /* VSX instructions, XX1-form */
3590-
/* reg reg none OR reg reg reg */
3591-
/* 3-register operand order: RA, RB, XT */
3592-
/* 2-register operand order: XS, RA or RA, XT */
3593-
xt := int32(p.To.Reg)
3594-
xs := int32(p.From.Reg)
3595-
/* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */
3596-
if REG_V0 <= xt && xt <= REG_V31 {
3597-
/* Convert V0-V31 to VS32-VS63 */
3598-
xt = xt + 64
3599-
o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
3600-
} else if REG_F0 <= xt && xt <= REG_F31 {
3601-
/* Convert F0-F31 to VS0-VS31 */
3602-
xt = xt + 64
3603-
o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
3604-
} else if REG_VS0 <= xt && xt <= REG_VS63 {
3605-
o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
3606-
} else if REG_V0 <= xs && xs <= REG_V31 {
3607-
/* Likewise for XS */
3608-
xs = xs + 64
3609-
o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
3610-
} else if REG_F0 <= xs && xs <= REG_F31 {
3611-
xs = xs + 64
3612-
o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
3613-
} else if REG_VS0 <= xs && xs <= REG_VS63 {
3614-
o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
3615-
}
3576+
case 88: /* VSX mfvsr* instructions, XX1-form XS,RA */
3577+
o1 = AOP_XX1(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
36163578

36173579
case 89: /* VSX instructions, XX2-form */
36183580
/* reg none reg OR reg imm reg */
@@ -3743,6 +3705,9 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
37433705
mb := uint32(c.regoff(&p.RestArgs[0].Addr))
37443706
me := uint32(c.regoff(&p.RestArgs[1].Addr))
37453707
o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
3708+
3709+
case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */
3710+
o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
37463711
}
37473712

37483713
out[0] = o1

src/cmd/internal/obj/ppc64/asm_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,44 @@ func TestPCalign(t *testing.T) {
107107
t.Errorf("Invalid alignment not detected for PCALIGN\n")
108108
}
109109
}
110+
111+
// Verify register constants are correctly aligned. Much of the ppc64 assembler assumes masking out significant
112+
// bits will produce a valid register number:
113+
// REG_Rx & 31 == x
114+
// REG_Fx & 31 == x
115+
// REG_Vx & 31 == x
116+
// REG_VSx & 63 == x
117+
// REG_SPRx & 1023 == x
118+
// REG_CRx & 7 == x
119+
//
120+
// VR and FPR disjointly overlap VSR, interpreting as VSR registers should produce the correctly overlapped VSR.
121+
// REG_FPx & 63 == x
122+
// REG_Vx & 63 == x + 32
123+
func TestRegValueAlignment(t *testing.T) {
124+
tstFunc := func(rstart, rend, msk, rout int) {
125+
for i := rstart; i <= rend; i++ {
126+
if i&msk != rout {
127+
t.Errorf("%v is not aligned to 0x%X (expected %d, got %d)\n", rconv(i), msk, rout, rstart&msk)
128+
}
129+
rout++
130+
}
131+
}
132+
var testType = []struct {
133+
rstart int
134+
rend int
135+
msk int
136+
rout int
137+
}{
138+
{REG_VS0, REG_VS63, 63, 0},
139+
{REG_R0, REG_R31, 31, 0},
140+
{REG_F0, REG_F31, 31, 0},
141+
{REG_V0, REG_V31, 31, 0},
142+
{REG_V0, REG_V31, 63, 32},
143+
{REG_F0, REG_F31, 63, 0},
144+
{REG_SPR0, REG_SPR0 + 1023, 1023, 0},
145+
{REG_CR0, REG_CR7, 7, 0},
146+
}
147+
for _, t := range testType {
148+
tstFunc(t.rstart, t.rend, t.msk, t.rout)
149+
}
150+
}

0 commit comments

Comments
 (0)