-
Notifications
You must be signed in to change notification settings - Fork 15.8k
Description
| Bugzilla Link | 47114 |
| Version | trunk |
| OS | All |
| CC | @comex,@CryZe,@davidbolvansky,@dtolnay,@jplatte,@nelhage,@rotateright |
Extended Description
Extracted from: rust-lang/rust#74267
Given the following C code, the b and c functions are behaviorally identical (https://godbolt.org/z/5eKxE5):
#include <stdlib.h>
#include <stdint.h>
#define N 2
typedef struct {
size_t length;
size_t capacity;
uint8_t* data;
} String;
static String new_string() {
String s = {0, 0, NULL};
return s;
}
struct Arr {
String data[N];
};
struct Arr b() {
struct Arr data;
for (size_t i = 0; i < N; i++) {
data.data[i] = new_string();
}
return data;
}
struct PartialArr {
struct Arr value;
};
struct Arr c() {
struct PartialArr data;
String (*slots)[N] = &data.value.data;
for (size_t i = 0; i < N; i++) {
(*slots)[i] = new_string();
}
return data.value;
}
However, they end up optimized very differently:
b: # @b
mov rax, rdi
vxorps xmm0, xmm0, xmm0
vmovups xmmword ptr [rdi], xmm0
mov qword ptr [rdi + 16], 0
vmovups xmmword ptr [rdi + 24], xmm0
mov qword ptr [rdi + 40], 0
ret
c: # @c
vxorps xmm0, xmm0, xmm0
vmovaps xmmword ptr [rsp - 56], xmm0
mov qword ptr [rsp - 40], 0
vmovups xmmword ptr [rsp - 32], xmm0
mov rax, rdi
mov qword ptr [rsp - 16], 0
vmovups xmm0, xmmword ptr [rsp - 56]
vmovups xmmword ptr [rdi], xmm0
mov rcx, qword ptr [rsp - 40]
mov qword ptr [rdi + 16], rcx
mov rcx, qword ptr [rsp - 32]
mov qword ptr [rdi + 24], rcx
mov rcx, qword ptr [rsp - 40]
mov qword ptr [rdi + 16], rcx
vmovups xmm0, xmmword ptr [rsp - 32]
vmovups xmmword ptr [rdi + 24], xmm0
mov rcx, qword ptr [rsp - 16]
mov qword ptr [rdi + 40], rcx
ret
GCC is able to optimize this better:
b:
mov QWORD PTR [rdi], 0
mov QWORD PTR [rdi+8], 0
mov QWORD PTR [rdi+16], 0
mov QWORD PTR [rdi+24], 0
mov QWORD PTR [rdi+32], 0
mov QWORD PTR [rdi+40], 0
mov rax, rdi
ret
c:
mov QWORD PTR [rdi], 0
mov QWORD PTR [rdi+8], 0
mov QWORD PTR [rdi+16], 0
mov QWORD PTR [rdi+24], 0
mov QWORD PTR [rdi+32], 0
mov QWORD PTR [rdi+40], 0
mov rax, rdi
ret