Skip to content

inlining reduces performance compared to manual inlining #35687

Closed
@JAicewizard

Description

@JAicewizard
$ go version
1.13.4

Does this issue reproduce with the latest release?

yes

What operating system and processor architecture are you using (go env)?

go env Output
$ go env
GO111MODULE=""
GOARCH="amd64"
GOBIN=""
GOCACHE="/home/jaap/.cache/go-build"
GOENV="/home/jaap/.config/go/env"
GOEXE=""
GOFLAGS=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GONOPROXY=""
GONOSUMDB=""
GOOS="linux"
GOPATH="/home/jaap/go"
GOPRIVATE=""
GOPROXY="https://proxy.golang.org,direct"
GOROOT="/usr/lib/go"
GOSUMDB="sum.golang.org"
GOTMPDIR=""
GOTOOLDIR="/usr/lib/go/pkg/tool/linux_amd64"
GCCGO="gccgo"
AR="ar"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
GOMOD="/home/jaap/go-projects/generator/go.mod"
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build995427981=/tmp/go-build -gno-record-gcc-switches"

What did you do?

package main

type Week [5][9]timeslot

type timeslot struct {
	Score      float32 `json:"score"`
	Percentage float32 `json:"percentage"`
	Reasons    []int   `json:"reasons"`
}
type timeid struct {
	ScheduleID string `json:"schedule_id"`
	Week       uint8  `json:"week"`
	Day        int    `json:"day"`
	Period     int    `json:"period"`
	Lesson     int    `json:"lesson"`
}

func (w Week) getPeriod(time timeid) timeslot {
	return w[time.Day][time.Period]
}

func main() {
	week := Week{}
	time := timeid{}
	averageScore := float32(5)
	totalDistanceBottom := float32(5)
	for day := range week {
		time.Day = day
		for period := range week[day] {
			time.Period = period

			score := week[time.Day][time.Period].Score
			//score := week.getPeriod(time).Score
			if score < averageScore {
				distance := averageScore - score
				week[day][period].Percentage = -distance / totalDistanceBottom
			}
		}
	}
}

What did you expect to see?

A manually inlined function call without a lot of moves.
Note that this does have bounds checks later on in the asm.

	0x0069 00105 ($PWD/main.go:32)	MOVQ	"".time+40(SP), DX
	0x006e 00110 ($PWD/main.go:32)	CMPQ	DX, $5
	0x0072 00114 ($PWD/main.go:32)	JCC	254
	0x0078 00120 ($PWD/main.go:32)	LEAQ	(DX)(DX*8), DX
	0x007c 00124 ($PWD/main.go:32)	SHLQ	$5, DX
	0x0080 00128 ($PWD/main.go:32)	PCDATA	$0, $2
	0x0080 00128 ($PWD/main.go:32)	LEAQ	"".week+64(SP)(DX*1), DX
	0x0085 00133 ($PWD/main.go:32)	MOVQ	CX, BX
	0x0088 00136 ($PWD/main.go:32)	SHLQ	$5, CX
	0x008c 00140 ($PWD/main.go:32)	PCDATA	$0, $0
	0x008c 00140 ($PWD/main.go:32)	MOVSS	(DX)(CX*1), X0

What did you see instead?

An automatically inlined function with a lot of moves

	0x006d 00109 ($PWD/main.go:33)	PCDATA	$0, $1
	0x006d 00109 ($PWD/main.go:33)	PCDATA	$1, $3
	0x006d 00109 ($PWD/main.go:33)	LEAQ	"".w+1552(SP), DI
	0x0075 00117 ($PWD/main.go:33)	PCDATA	$0, $2
	0x0075 00117 ($PWD/main.go:33)	LEAQ	"".week+112(SP), SI
	0x007a 00122 ($PWD/main.go:29)	MOVQ	CX, DX
	0x007d 00125 ($PWD/main.go:33)	MOVL	$180, CX
	0x0082 00130 ($PWD/main.go:33)	PCDATA	$0, $0
	0x0082 00130 ($PWD/main.go:33)	REP
	0x0083 00131 ($PWD/main.go:33)	MOVSQ
	0x0085 00133 ($PWD/main.go:33)	PCDATA	$1, $4
	0x0085 00133 ($PWD/main.go:33)	MOVUPS	"".time+64(SP), X0
	0x008a 00138 ($PWD/main.go:33)	MOVUPS	X0, "".time+16(SP)
	0x008f 00143 ($PWD/main.go:33)	MOVUPS	"".time+80(SP), X0
	0x0094 00148 ($PWD/main.go:33)	MOVUPS	X0, "".time+32(SP)
	0x0099 00153 ($PWD/main.go:33)	MOVUPS	"".time+96(SP), X0
	0x009e 00158 ($PWD/main.go:33)	MOVUPS	X0, "".time+48(SP)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions