@@ -1838,14 +1838,210 @@ func fillnop(p []byte, n int) {
1838
1838
}
1839
1839
}
1840
1840
1841
+ func noppad (ctxt * obj.Link , s * obj.LSym , c int32 , pad int32 ) int32 {
1842
+ s .Grow (int64 (c ) + int64 (pad ))
1843
+ fillnop (s .P [c :], int (pad ))
1844
+ return c + pad
1845
+ }
1846
+
1841
1847
func spadjop (ctxt * obj.Link , l , q obj.As ) obj.As {
1842
1848
if ctxt .Arch .Family != sys .AMD64 || ctxt .Arch .PtrSize == 4 {
1843
1849
return l
1844
1850
}
1845
1851
return q
1846
1852
}
1847
1853
1854
+ // If the environment variable GOAMD64=alignedjumps the assembler will ensure that
1855
+ // no standalone or macro-fused jump will straddle or end on a 32 byte boundary
1856
+ // by inserting NOPs before the jumps
1857
+ func isJump (p * obj.Prog ) bool {
1858
+ return p .Pcond != nil || p .As == obj .AJMP || p .As == obj .ACALL ||
1859
+ p .As == obj .ARET || p .As == obj .ADUFFCOPY || p .As == obj .ADUFFZERO
1860
+ }
1861
+
1862
+ // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
1863
+ // jump. Otherwise, nil is returned.
1864
+ func lookForJCC (p * obj.Prog ) * obj.Prog {
1865
+ // Skip any PCDATA, FUNCDATA or NOP instructions
1866
+ var q * obj.Prog
1867
+ for q = p .Link ; q != nil && (q .As == obj .APCDATA || q .As == obj .AFUNCDATA || q .As == obj .ANOP ); q = q .Link {
1868
+ }
1869
+
1870
+ if q == nil || q .Pcond == nil || p .As == obj .AJMP || p .As == obj .ACALL {
1871
+ return nil
1872
+ }
1873
+
1874
+ switch q .As {
1875
+ case AJOS , AJOC , AJCS , AJCC , AJEQ , AJNE , AJLS , AJHI ,
1876
+ AJMI , AJPL , AJPS , AJPC , AJLT , AJGE , AJLE , AJGT :
1877
+ default :
1878
+ return nil
1879
+ }
1880
+
1881
+ return q
1882
+ }
1883
+
1884
+ // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
1885
+ // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
1886
+ // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
1887
+ func fusedJump (p * obj.Prog ) (bool , uint8 ) {
1888
+ var fusedSize uint8
1889
+
1890
+ // The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
1891
+ // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
1892
+ // need to be careful to insert any padding before the locks rather than directly after them.
1893
+
1894
+ if p .As == AXRELEASE || p .As == AXACQUIRE {
1895
+ fusedSize += p .Isize
1896
+ for p = p .Link ; p != nil && (p .As == obj .APCDATA || p .As == obj .AFUNCDATA ); p = p .Link {
1897
+ }
1898
+ if p == nil {
1899
+ return false , 0
1900
+ }
1901
+ }
1902
+ if p .As == ALOCK {
1903
+ fusedSize += p .Isize
1904
+ for p = p .Link ; p != nil && (p .As == obj .APCDATA || p .As == obj .AFUNCDATA ); p = p .Link {
1905
+ }
1906
+ if p == nil {
1907
+ return false , 0
1908
+ }
1909
+ }
1910
+ cmp := p .As == ACMPB || p .As == ACMPL || p .As == ACMPQ || p .As == ACMPW
1911
+
1912
+ cmpAddSub := p .As == AADDB || p .As == AADDL || p .As == AADDW || p .As == AADDQ ||
1913
+ p .As == ASUBB || p .As == ASUBL || p .As == ASUBW || p .As == ASUBQ || cmp
1914
+
1915
+ testAnd := p .As == ATESTB || p .As == ATESTL || p .As == ATESTQ || p .As == ATESTW ||
1916
+ p .As == AANDB || p .As == AANDL || p .As == AANDQ || p .As == AANDW
1917
+
1918
+ incDec := p .As == AINCB || p .As == AINCL || p .As == AINCQ || p .As == AINCW ||
1919
+ p .As == ADECB || p .As == ADECL || p .As == ADECQ || p .As == ADECW
1920
+
1921
+ if ! cmpAddSub && ! testAnd && ! incDec {
1922
+ return false , 0
1923
+ }
1924
+
1925
+ if ! incDec {
1926
+ var argOne obj.AddrType
1927
+ var argTwo obj.AddrType
1928
+ if cmp {
1929
+ argOne = p .From .Type
1930
+ argTwo = p .To .Type
1931
+ } else {
1932
+ argOne = p .To .Type
1933
+ argTwo = p .From .Type
1934
+ }
1935
+ if argOne == obj .TYPE_REG {
1936
+ if argTwo != obj .TYPE_REG && argTwo != obj .TYPE_CONST && argTwo != obj .TYPE_MEM {
1937
+ return false , 0
1938
+ }
1939
+ } else if argOne == obj .TYPE_MEM {
1940
+ if argTwo != obj .TYPE_REG {
1941
+ return false , 0
1942
+ }
1943
+ } else {
1944
+ return false , 0
1945
+ }
1946
+ }
1947
+
1948
+ fusedSize += p .Isize
1949
+ jmp := lookForJCC (p )
1950
+ if jmp == nil {
1951
+ return false , 0
1952
+ }
1953
+
1954
+ fusedSize += jmp .Isize
1955
+
1956
+ if testAnd {
1957
+ return true , fusedSize
1958
+ }
1959
+
1960
+ if jmp .As == AJOC || jmp .As == AJOS || jmp .As == AJMI ||
1961
+ jmp .As == AJPL || jmp .As == AJPS || jmp .As == AJPC {
1962
+ return false , 0
1963
+ }
1964
+
1965
+ if cmpAddSub {
1966
+ return true , fusedSize
1967
+ }
1968
+
1969
+ if jmp .As == AJCS || jmp .As == AJCC || jmp .As == AJHI || jmp .As == AJLS {
1970
+ return false , 0
1971
+ }
1972
+
1973
+ return true , fusedSize
1974
+ }
1975
+
1976
+ type padJumpsCtx int32
1977
+
1978
+ func makePjcCtx (ctxt * obj.Link ) padJumpsCtx {
1979
+ // Disable jump padding on 32 bit builds by settting
1980
+ // padJumps to 0.
1981
+ if ctxt .Arch .Family == sys .I386 {
1982
+ return padJumpsCtx (0 )
1983
+ }
1984
+
1985
+ // Disable jump padding for hand written assembly code.
1986
+ if ctxt .IsAsm {
1987
+ return padJumpsCtx (0 )
1988
+ }
1989
+
1990
+ if objabi .GOAMD64 != "alignedjumps" {
1991
+ return padJumpsCtx (0 )
1992
+
1993
+ }
1994
+
1995
+ return padJumpsCtx (32 )
1996
+ }
1997
+
1998
+ // padJump detects whether the instruction being assembled is a standalone or a macro-fused
1999
+ // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
2000
+ // not cross or end on a 32 byte boundary.
2001
+ func (pjc padJumpsCtx ) padJump (ctxt * obj.Link , s * obj.LSym , p * obj.Prog , c int32 ) int32 {
2002
+ if pjc == 0 {
2003
+ return c
2004
+ }
2005
+
2006
+ var toPad int32
2007
+ fj , fjSize := fusedJump (p )
2008
+ mask := int32 (pjc - 1 )
2009
+ if fj {
2010
+ if (c & mask )+ int32 (fjSize ) >= int32 (pjc ) {
2011
+ toPad = int32 (pjc ) - (c & mask )
2012
+ }
2013
+ } else if isJump (p ) {
2014
+ if (c & mask )+ int32 (p .Isize ) >= int32 (pjc ) {
2015
+ toPad = int32 (pjc ) - (c & mask )
2016
+ }
2017
+ }
2018
+ if toPad <= 0 {
2019
+ return c
2020
+ }
2021
+
2022
+ return noppad (ctxt , s , c , toPad )
2023
+ }
2024
+
2025
+ // reAssemble is called if an instruction's size changes during assembly. If
2026
+ // it does and the instruction is a standalone or a macro-fused jump we need to
2027
+ // reassemble.
2028
+ func (pjc padJumpsCtx ) reAssemble (p * obj.Prog ) bool {
2029
+ if pjc == 0 {
2030
+ return false
2031
+ }
2032
+
2033
+ fj , _ := fusedJump (p )
2034
+ return fj || isJump (p )
2035
+ }
2036
+
2037
+ type nopPad struct {
2038
+ p * obj.Prog // Instruction before the pad
2039
+ n int32 // Size of the pad
2040
+ }
2041
+
1848
2042
func span6 (ctxt * obj.Link , s * obj.LSym , newprog obj.ProgAlloc ) {
2043
+ pjc := makePjcCtx (ctxt )
2044
+
1849
2045
if s .P != nil {
1850
2046
return
1851
2047
}
@@ -1903,6 +2099,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
1903
2099
var n int
1904
2100
var c int32
1905
2101
errors := ctxt .Errors
2102
+ var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
1906
2103
for {
1907
2104
// This loop continues while there are reasons to re-assemble
1908
2105
// whole block, like the presence of long forward jumps.
@@ -1913,9 +2110,13 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
1913
2110
s .R = s .R [:0 ]
1914
2111
s .P = s .P [:0 ]
1915
2112
c = 0
2113
+ var pPrev * obj.Prog
2114
+ nops = nops [:0 ]
1916
2115
for p := s .Func .Text ; p != nil ; p = p .Link {
2116
+ c0 := c
2117
+ c = pjc .padJump (ctxt , s , p , c )
1917
2118
1918
- if ( p .Back & branchLoopHead != 0 ) && c & (loopAlign - 1 ) != 0 {
2119
+ if maxLoopPad > 0 && p .Back & branchLoopHead != 0 && c & (loopAlign - 1 ) != 0 {
1919
2120
// pad with NOPs
1920
2121
v := - c & (loopAlign - 1 )
1921
2122
@@ -1954,11 +2155,21 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
1954
2155
m := ab .Len ()
1955
2156
if int (p .Isize ) != m {
1956
2157
p .Isize = uint8 (m )
2158
+ if pjc .reAssemble (p ) {
2159
+ // We need to re-assemble here to check for jumps and fused jumps
2160
+ // that span or end on 32 byte boundaries.
2161
+ reAssemble = true
2162
+ }
1957
2163
}
1958
2164
1959
2165
s .Grow (p .Pc + int64 (m ))
1960
2166
copy (s .P [p .Pc :], ab .Bytes ())
2167
+ // If there was padding, remember it.
2168
+ if pPrev != nil && ! ctxt .IsAsm && c > c0 {
2169
+ nops = append (nops , nopPad {p : pPrev , n : c - c0 })
2170
+ }
1961
2171
c += int32 (m )
2172
+ pPrev = p
1962
2173
}
1963
2174
1964
2175
n ++
@@ -1973,6 +2184,12 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
1973
2184
return
1974
2185
}
1975
2186
}
2187
+ // splice padding nops into Progs
2188
+ for _ , n := range nops {
2189
+ pp := n .p
2190
+ np := & obj.Prog {Link : pp .Link , Ctxt : pp .Ctxt , As : obj .ANOP , Pos : pp .Pos .WithNotStmt (), Pc : pp .Pc + int64 (pp .Isize ), Isize : uint8 (n .n )}
2191
+ pp .Link = np
2192
+ }
1976
2193
1977
2194
s .Size = int64 (c )
1978
2195
0 commit comments