@@ -65,6 +65,11 @@ const (
65
65
PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0
66
66
)
67
67
68
+ const (
69
+ // The preferred hardware nop instruction.
70
+ NOP = 0x60000000
71
+ )
72
+
68
73
type Optab struct {
69
74
as obj.As // Opcode
70
75
a1 uint8 // p.From argument (obj.Addr). p is of type obj.Prog.
@@ -831,7 +836,6 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
831
836
// lay out the code, emitting code and data relocations.
832
837
833
838
bp := c .cursym .P
834
- nop := LOP_IRR (OP_ORI , REGZERO , REGZERO , 0 )
835
839
var i int32
836
840
for p := c .cursym .Func ().Text .Link ; p != nil ; p = p .Link {
837
841
c .pc = p .Pc
@@ -846,13 +850,13 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
846
850
if v > 0 {
847
851
// Same padding instruction for all
848
852
for i = 0 ; i < int32 (v / 4 ); i ++ {
849
- c .ctxt .Arch .ByteOrder .PutUint32 (bp , nop )
853
+ c .ctxt .Arch .ByteOrder .PutUint32 (bp , NOP )
850
854
bp = bp [4 :]
851
855
}
852
856
}
853
857
} else {
854
858
if p .Mark & PFX_X64B != 0 {
855
- c .ctxt .Arch .ByteOrder .PutUint32 (bp , nop )
859
+ c .ctxt .Arch .ByteOrder .PutUint32 (bp , NOP )
856
860
bp = bp [4 :]
857
861
}
858
862
o .asmout (& c , p , o , & out )
@@ -2531,6 +2535,18 @@ func decodeMask64(mask int64) (mb, me uint32, valid bool) {
2531
2535
return mb , (me - 1 ) & 63 , valid
2532
2536
}
2533
2537
2538
+ // Load the lower 16 bits of a constant into register r.
2539
+ func loadl16 (r int , d int64 ) uint32 {
2540
+ v := uint16 (d )
2541
+ if v == 0 {
2542
+ // Avoid generating "ori r,r,0", r != 0. Instead, generate the architectually preferred nop.
2543
+ // For example, "ori r31,r31,0" is a special execution serializing nop on Power10 called "exser".
2544
+ return NOP
2545
+ }
2546
+ return LOP_IRR (OP_ORI , uint32 (r ), uint32 (r ), uint32 (v ))
2547
+ }
2548
+
2549
+ // Load the upper 16 bits of a 32b constant into register r.
2534
2550
func loadu32 (r int , d int64 ) uint32 {
2535
2551
v := int32 (d >> 16 )
2536
2552
if isuint32 (uint64 (d )) {
@@ -2734,7 +2750,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
2734
2750
rel .Add = int64 (v )
2735
2751
rel .Type = objabi .R_CALLPOWER
2736
2752
}
2737
- o2 = 0x60000000 // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking
2753
+ o2 = NOP // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking
2738
2754
2739
2755
case 13 : /* mov[bhwd]{z,} r,r */
2740
2756
// This needs to handle "MOV* $0, Rx". This shows up because $0 also
@@ -2957,14 +2973,14 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
2957
2973
} else if o .size == 12 {
2958
2974
// Note, o1 is ADDIS if d is negative, ORIS otherwise.
2959
2975
o1 = loadu32 (REGTMP , d ) // tmp = d & 0xFFFF0000
2960
- o2 = LOP_IRR ( OP_ORI , REGTMP , REGTMP , uint32 ( int32 ( d ))) // tmp |= d & 0xFFFF
2976
+ o2 = loadl16 ( REGTMP , d ) // tmp |= d & 0xFFFF
2961
2977
o3 = AOP_RRR (c .oprrr (p .As ), uint32 (p .To .Reg ), REGTMP , uint32 (r )) // to = from + tmp
2962
2978
} else {
2963
2979
// For backwards compatibility with GOPPC64 < 10, generate 34b constants in register.
2964
- o1 = LOP_IRR (OP_ADDIS , REGZERO , REGTMP , uint32 (d >> 32 )) // tmp = sign_extend((d>>32)&0xFFFF0000)
2965
- o2 = LOP_IRR ( OP_ORI , REGTMP , REGTMP , uint32 (d >> 16 )) // tmp |= (d>>16)&0xFFFF
2966
- o3 = AOP_MD (OP_RLDICR , REGTMP , REGTMP , 16 , 63 - 16 ) // tmp <<= 16
2967
- o4 = LOP_IRR ( OP_ORI , REGTMP , REGTMP , uint32 (uint16 (d ))) // tmp |= d&0xFFFF
2980
+ o1 = LOP_IRR (OP_ADDIS , REGZERO , REGTMP , uint32 (d >> 32 )) // tmp = sign_extend((d>>32)&0xFFFF0000)
2981
+ o2 = loadl16 ( REGTMP , int64 (d >> 16 )) // tmp |= (d>>16)&0xFFFF
2982
+ o3 = AOP_MD (OP_RLDICR , REGTMP , REGTMP , 16 , 63 - 16 ) // tmp <<= 16
2983
+ o4 = loadl16 ( REGTMP , int64 (uint16 (d ))) // tmp |= d&0xFFFF
2968
2984
o5 = AOP_RRR (c .oprrr (p .As ), uint32 (p .To .Reg ), REGTMP , uint32 (r ))
2969
2985
}
2970
2986
@@ -2985,7 +3001,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
2985
3001
o2 = LOP_RRR (c .oprrr (p .As ), uint32 (p .To .Reg ), REGTMP , uint32 (r ))
2986
3002
} else {
2987
3003
o1 = loadu32 (REGTMP , d )
2988
- o2 = LOP_IRR ( OP_ORI , REGTMP , REGTMP , uint32 ( int32 ( d )) )
3004
+ o2 = loadl16 ( REGTMP , d )
2989
3005
o3 = LOP_RRR (c .oprrr (p .As ), uint32 (p .To .Reg ), REGTMP , uint32 (r ))
2990
3006
}
2991
3007
if p .From .Sym != nil {
@@ -3081,9 +3097,9 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
3081
3097
if p .To .Reg == REGTMP || p .From .Reg == REGTMP {
3082
3098
c .ctxt .Diag ("can't synthesize large constant\n %v" , p )
3083
3099
}
3084
- v := c .regoff (p .GetFrom3 ())
3100
+ v := c .vregoff (p .GetFrom3 ())
3085
3101
o1 = AOP_IRR (OP_ADDIS , REGTMP , REGZERO , uint32 (v )>> 16 )
3086
- o2 = LOP_IRR ( OP_ORI , REGTMP , REGTMP , uint32 ( v ) )
3102
+ o2 = loadl16 ( REGTMP , v )
3087
3103
o3 = AOP_RRR (c .oprrr (p .As ), uint32 (p .To .Reg ), uint32 (p .From .Reg ), REGTMP )
3088
3104
if p .From .Sym != nil {
3089
3105
c .ctxt .Diag ("%v is not supported" , p )
0 commit comments