-
Notifications
You must be signed in to change notification settings - Fork 75
Extra memcpy when compiling with rustc_codegen_gcc #56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
It might be because the sysroot is not being compiled with optimizations yet. This commit in that PR was meant to allow compiling the sysroot with optimizations, but is breaking a hack. I have yet to look into fixing this. |
That does not seem to be the problem. Here's the GIMPLE for future reference: void _ZN9test_rust3foo17h9f0abfb59652170fE (signed long * param0)
{
struct struct undefined;
signed long dummyValueThatShouldNeverBeUsed;
struct struct undefined;
signed long dummyValueThatShouldNeverBeUsed;
struct struct undefined;
signed long dummyValueThatShouldNeverBeUsed;
struct struct undefined;
signed long * stack_var_4;
struct stack_var_3;
struct stack_var_2;
struct stack_var_1;
struct struct undefined;
struct struct undefined;
try
{
start:
stack_var_4.3_1 = &stack_var_4;
MEM[(signed long * *)stack_var_4.3_1] = param0;
_ZN9test_rust2SV3new17h38b8a107f2f9bb1aE (&stack_var_2);
dummyValueThatShouldNeverBeUsed = 0;
goto bb1;
bb1:
_ZN9test_rust2SV3new17h38b8a107f2f9bb1aE (&stack_var_3);
dummyValueThatShouldNeverBeUsed = 0;
goto bb2;
bb2:
memcpy (&stack_var_1, &stack_var_2, 336);
memcpy (&stack_var_1.field_3, &stack_var_3, 336);
_ZN9test_rust19Allocation_LT_T_GT_4init17h88034cc4566a9510E (param0, &stack_var_1);
dummyValueThatShouldNeverBeUsed = 0;
goto bb3;
bb3:
return;
}
finally
{
undefined = {CLOBBER};
undefined = {CLOBBER};
undefined = {CLOBBER};
undefined = {CLOBBER};
stack_var_4 = {CLOBBER};
stack_var_3 = {CLOBBER};
stack_var_2 = {CLOBBER};
stack_var_1 = {CLOBBER};
undefined = {CLOBBER};
undefined = {CLOBBER};
}
} while with void __GIMPLE (struct Allocation a, double g)
{
struct L s;
try
{
# DEBUG BEGIN_STMT
s.a = SV::make (); [return slot optimization]
s.b = SV::make (); [return slot optimization]
# DEBUG BEGIN_STMT
Allocation<L>::init (&a, s);
}
finally
{
s = _Literal (struct L) {CLOBBER};
}
} |
I think this is caused by bad handling of #![allow(deprecated)]
#[no_mangle]
unsafe extern "C" fn new_uninit() -> [usize; 40] {
std::mem::uninitialized()
} https://rust.godbolt.org/z/z6T68Yx7c For some reason the assembly contains two globals full of garbage: global_1_example_67fcd5eb_cgu_0:
.quad global_0_example_67fcd5eb_cgu_0
.byte 15
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 5
.byte 0
.byte 0
.byte 0
.byte 5
.byte 0
.byte 0
.byte 0
global_0_example_67fcd5eb_cgu_0:
.byte 47
.byte 97
.byte 112
.byte 112
.byte 47
.byte 101
.byte 120
.byte 97
.byte 109
.byte 112
.byte 108
.byte 101
.byte 46
.byte 114
.byte 115 Using #[no_mangle]
unsafe extern "C" fn new_uninit() -> std::mem::MaybeUninit<[usize; 40]> {
std::mem::MaybeUninit::uninit()
} https://rust.godbolt.org/z/3os7jG3ro new_uninit:
ret |
Right, |
Maybe it's just because inlining is not implemented yet? |
Even without inlining the |
The example in the original post now seems good locally (with a sysroot compiled in release mode, which is not the case on Compiler Explorer, which is currently broken, so it doesn't have the latest version). The __attribute__((visibility ("default")))
void new_uninit (size_t[40] * param0)
{
void * undefined;
long int * stack_var_2;
long int * stack_var_1;
struct stack_var_2;
struct stack_var_1;
try
{
start:
stack_var_1.0_1 = &stack_var_1;
MEM[(struct * *)stack_var_1.0_1] = &stack_var_1;
stack_var_2.1_2 = &stack_var_2;
MEM[(long int * *)stack_var_2.1_2] = &stack_var_1;
memset (&stack_var_1, 1, 320);
memcpy (&stack_var_2, &stack_var_1, 320);
memcpy (param0, &stack_var_2, 320);
return;
}
finally
{
stack_var_2 = {CLOBBER(eol)};
stack_var_1 = {CLOBBER(eol)};
stack_var_2 = {CLOBBER(eol)};
stack_var_1 = {CLOBBER(eol)};
}
} |
Or is it actually optimized? There are no call intructions or jumps in the cg_gcc version while cg_llvm calls |
When compiling the following there's an extra memcpy that LLVM is able to optimize away:
See: https://rust.godbolt.org/z/1d843PPnx
This test case is derived from rust-lang/rust#58082 which links to https://bugs.llvm.org/show_bug.cgi?id=40574 which suggests that GCC should be able to optimize something like this.
The text was updated successfully, but these errors were encountered: