Skip to content

[Perf] Windows/x86: 2 Improvements on 3/13/2023 6:11:08 PM #14042

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
performanceautofiler bot opened this issue Mar 14, 2023 · 1 comment
Closed

[Perf] Windows/x86: 2 Improvements on 3/13/2023 6:11:08 PM #14042

performanceautofiler bot opened this issue Mar 14, 2023 · 1 comment

Comments

@performanceautofiler
Copy link

performanceautofiler bot commented Mar 14, 2023

Run Information

Architecture x86
OS Windows 10.0.18362
Baseline 7e3bf878c105ea6114e56d3e6e3014c5480cb060
Compare 1c8d37af80667daffb3cb80ce0fe915621e8f039
Diff Diff

Improvements in StoreBlock.LocalAddress

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
CopyBlock128 - Duration of single invocation 11.17 ns 1.90 ns 0.17 0.00 False 8.122992473272928 11.374861093962217 1.4003288974339085) Trace Trace

graph
Test Report

Repro

General Docs link: https://github.com/dotnet/performance/blob/main/docs/benchmarking-workflow-dotnet-runtime.md

Payloads

Baseline
Compare

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'StoreBlock.LocalAddress*'

Payloads

Baseline
Compare

Histogram

StoreBlock.LocalAddress.CopyBlock128


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 1.896827572837044 < 10.61636497311516.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/14/2023 10:43:28 AM falls between 3/5/2023 5:50:49 PM and 3/14/2023 10:43:28 AM.
IsImprovementStdDev: Marked as improvement because 7299.225310165283 (T) = (0 -1.8967887299785873) / Math.Sqrt((0.00045510608243344824 / (299)) + (3.936085177711924E-07 / (4))) is greater than 1.9678765312856872 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (4) - 2, .975) and 0.8304714299149188 = (11.188608085508223 - 1.8967887299785873) / 11.188608085508223 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; StoreBlock.LocalAddress.CopyBlock128()
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,100
       lea       eax,[ecx+7C]
       lea       edi,[ebp-88]
       mov       esi,eax
       mov       ecx,80
       rep movsb
       xor       edx,edx
       nop
M00_L00:
       lea       edi,[ebp-108]
       lea       esi,[ebp-88]
       mov       ecx,80
       rep movsb
       inc       edx
       cmp       edx,64
       jl        short M00_L00
       mov       edi,eax
       lea       esi,[ebp-108]
       mov       ecx,80
       rep movsb
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret
; Total bytes of code 79

Compare Jit Disasm

; StoreBlock.LocalAddress.CopyBlock128()
       push      ebp
       mov       ebp,esp
       sub       esp,100
       vzeroupper
       add       ecx,7C
       vmovdqu   ymm0,ymmword ptr [ecx]
       vmovdqu   ymmword ptr [ebp-80],ymm0
       vmovdqu   ymm0,ymmword ptr [ecx+20]
       vmovdqu   ymmword ptr [ebp-60],ymm0
       vmovdqu   ymm0,ymmword ptr [ecx+40]
       vmovdqu   ymmword ptr [ebp-40],ymm0
       vmovdqu   ymm0,ymmword ptr [ecx+60]
       vmovdqu   ymmword ptr [ebp-20],ymm0
       xor       eax,eax
       nop
       nop
       nop
       nop
       nop
       nop
       nop
       nop
M00_L00:
       vmovdqu   ymm0,ymmword ptr [ebp-80]
       vmovdqu   ymmword ptr [ebp-100],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-60]
       vmovdqu   ymmword ptr [ebp-0E0],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-40]
       vmovdqu   ymmword ptr [ebp-0C0],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-20]
       vmovdqu   ymmword ptr [ebp-0A0],ymm0
       inc       eax
       cmp       eax,64
       jl        short M00_L00
       vmovdqu   ymm0,ymmword ptr [ebp-100]
       vmovdqu   ymmword ptr [ecx],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-0E0]
       vmovdqu   ymmword ptr [ecx+20],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-0C0]
       vmovdqu   ymmword ptr [ecx+40],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-0A0]
       vmovdqu   ymmword ptr [ecx+60],ymm0
       mov       esp,ebp
       pop       ebp
       ret
; Total bytes of code 177

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Run Information

Architecture x86
OS Windows 10.0.18362
Baseline 7e3bf878c105ea6114e56d3e6e3014c5480cb060
Compare 1c8d37af80667daffb3cb80ce0fe915621e8f039
Diff Diff

Improvements in StoreBlock.AnyLocation

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
CopyBlock128 - Duration of single invocation 12.28 ns 3.02 ns 0.25 0.00 False Trace Trace

graph
Test Report

Repro

General Docs link: https://github.com/dotnet/performance/blob/main/docs/benchmarking-workflow-dotnet-runtime.md

Payloads

Baseline
Compare

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'StoreBlock.AnyLocation*'

Payloads

Baseline
Compare

Histogram

StoreBlock.AnyLocation.CopyBlock128


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 3.0212941400685547 < 11.680166465794535.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/14/2023 10:43:28 AM falls between 3/5/2023 5:50:49 PM and 3/14/2023 10:43:28 AM.
IsImprovementStdDev: Marked as improvement because 2850.0975915399436 (T) = (0 -3.0224128232889456) / Math.Sqrt((0.0017321750189426277 / (299)) + (1.888854741800787E-05 / (4))) is greater than 1.9678765312856872 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (4) - 2, .975) and 0.7535649059568582 = (12.264539005795301 - 3.0224128232889456) / 12.264539005795301 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; StoreBlock.AnyLocation.CopyBlock128()
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       mov       eax,ecx
       xor       edx,edx
M00_L00:
       mov       ecx,[eax+8]
       cmp       edx,[ecx+4]
       jae       short M00_L01
       lea       edi,[ecx+edx+8]
       mov       ecx,[eax+4]
       cmp       edx,[ecx+4]
       jae       short M00_L01
       lea       esi,[ecx+edx+8]
       mov       ecx,80
       rep movsb
       add       edx,80
       cmp       edx,1000
       jl        short M00_L00
       pop       esi
       pop       edi
       pop       ebp
       ret
M00_L01:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 64

Compare Jit Disasm

; StoreBlock.AnyLocation.CopyBlock128()
       push      ebp
       mov       ebp,esp
       push      esi
       vzeroupper
       xor       eax,eax
M00_L00:
       mov       edx,[ecx+8]
       cmp       eax,[edx+4]
       jae       short M00_L01
       lea       edx,[edx+eax+8]
       mov       esi,[ecx+4]
       cmp       eax,[esi+4]
       jae       short M00_L01
       lea       esi,[esi+eax+8]
       vmovdqu   ymm0,ymmword ptr [esi]
       vmovdqu   ymmword ptr [edx],ymm0
       vmovdqu   ymm0,ymmword ptr [esi+20]
       vmovdqu   ymmword ptr [edx+20],ymm0
       vmovdqu   ymm0,ymmword ptr [esi+40]
       vmovdqu   ymmword ptr [edx+40],ymm0
       vmovdqu   ymm0,ymmword ptr [esi+60]
       vmovdqu   ymmword ptr [edx+60],ymm0
       add       eax,80
       cmp       eax,1000
       jl        short M00_L00
       pop       esi
       pop       ebp
       ret
M00_L01:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 92

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants