Skip to content

Commit 3e0cdf3

Browse files
committed
Upgrade a rdar://5907648 link to a github issue
#64174
1 parent 80fae88 commit 3e0cdf3

File tree

2 files changed

+25
-31
lines changed

2 files changed

+25
-31
lines changed

llvm/lib/Target/X86/README-SSE.txt

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -680,37 +680,6 @@ _t:
680680
shufps $132, %xmm2, %xmm0
681681
movaps %xmm0, 0
682682

683-
//===---------------------------------------------------------------------===//
684-
rdar://5907648
685-
686-
This function:
687-
688-
float foo(unsigned char x) {
689-
return x;
690-
}
691-
692-
compiles to (x86-32):
693-
694-
define float @foo(i8 zeroext %x) nounwind {
695-
%tmp12 = uitofp i8 %x to float ; <float> [#uses=1]
696-
ret float %tmp12
697-
}
698-
699-
compiles to:
700-
701-
_foo:
702-
subl $4, %esp
703-
movzbl 8(%esp), %eax
704-
cvtsi2ss %eax, %xmm0
705-
movss %xmm0, (%esp)
706-
flds (%esp)
707-
addl $4, %esp
708-
ret
709-
710-
We should be able to use:
711-
cvtsi2ss 8($esp), %xmm0
712-
since we know the stack slot is already zext'd.
713-
714683
//===---------------------------------------------------------------------===//
715684

716685
Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64))

llvm/test/CodeGen/X86/int8-to-fp.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse2 | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 | FileCheck %s --check-prefix=X64
4+
5+
; We get this right for x86-64, but on x86-32 the code is less optimal.
6+
; See: https://github.com/llvm/llvm-project/issues/64174
7+
define noundef float @i8_to_fp(i8 noundef zeroext %0) {
8+
; X86-LABEL: i8_to_fp:
9+
; X86: ## %bb.0:
10+
; X86-NEXT: pushl %eax
11+
; X86-NEXT: .cfi_def_cfa_offset 8
12+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
13+
; X86-NEXT: cvtsi2ss %eax, %xmm0
14+
; X86-NEXT: movss %xmm0, (%esp)
15+
; X86-NEXT: flds (%esp)
16+
; X86-NEXT: popl %eax
17+
; X86-NEXT: retl
18+
;
19+
; X64-LABEL: i8_to_fp:
20+
; X64: ## %bb.0:
21+
; X64-NEXT: cvtsi2ss %edi, %xmm0
22+
; X64-NEXT: retq
23+
%2 = uitofp i8 %0 to float
24+
ret float %2
25+
}

0 commit comments

Comments
 (0)