From 5c54d80700872d5dd5759c2368937f06bef7b7db Mon Sep 17 00:00:00 2001
From: Carl Peto <carl.peto@me.com>
Date: Wed, 30 Jan 2019 11:19:26 +0000
Subject: [PATCH 1/3] Create an overridable hook into TargetLoweringInfo for
 ABI calling convention endianness as it does not always match the overall
 endianness. When an argument is too large to be legalised by the architecture
 and is split for the ABI, we may need flexibility how we order the resulting
 parameters. (alternative approach recommended by Eli Friedman)

---
 include/llvm/CodeGen/TargetLowering.h         |  9 ++++
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp      |  2 +-
 lib/Target/AVR/AVRISelLowering.h              |  5 ++
 .../CodeGen/AVR/umul.with.overflow.i16-bug.ll | 52 +++++++++++++++++++
 4 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/AVR/umul.with.overflow.i16-bug.ll

diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 38e575b1360f..fe97fecc444c 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -3358,6 +3358,15 @@ class TargetLowering : public TargetLoweringBase {
     return false;
   }
 
+  /// For some targets, an LLVM type must be broken down into multiple
+  /// smaller types. Usually the halves are ordered according to the endianness
+  /// but for some platform that would break. So this method will default to
+  /// matching the endianness but can be overridden.
+  virtual bool
+  functionArgumentsSplitLittleEndian(const DataLayout &DL) const {
+    return DL.isLittleEndian();
+  }
+
   /// Returns a 0 terminated array of registers that can be safely used as
   /// scratch registers.
   virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index dcb479e4ce1f..0dd7226d2cb0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3398,7 +3398,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // being a legal type for the architecture and thus has to be split to
       // two arguments.
       SDValue Ret;
-      if(DAG.getDataLayout().isLittleEndian()) {
+      if(TLI.functionArgumentsSplitLittleEndian(DAG.getDataLayout())) {
         // Halves of WideVT are packed into registers in different order
         // depending on platform endianness. This is usually handled by
         // the C calling convention, but we can't defer to it in
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index 7d77dd8fb018..7878047d3452 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -129,6 +129,11 @@ class AVRTargetLowering : public TargetLowering {
   unsigned getRegisterByName(const char* RegName, EVT VT,
                              SelectionDAG &DAG) const override;
 
+  bool functionArgumentsSplitLittleEndian(const DataLayout &DL)
+    const override {
+      return false;
+  }
+
 private:
   SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
                     SelectionDAG &DAG, SDLoc dl) const;
diff --git a/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
new file mode 100644
index 000000000000..853d79405160
--- /dev/null
+++ b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
@@ -0,0 +1,52 @@
+; RUN: llc -O1 < %s -march=avr | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9"
+
+%Vs6UInt16 = type <{ i16 }>
+%Sb = type <{ i1 }>
+
+define hidden void @_TF4main13setServoAngleFT5angleVs6UInt16_T_(i16) #0 {
+  ; CHECK-LABEL: entry
+entry:
+  %adjustedAngle = alloca %Vs6UInt16, align 2
+  %1 = bitcast %Vs6UInt16* %adjustedAngle to i8*
+  %adjustedAngle._value = getelementptr inbounds %Vs6UInt16, %Vs6UInt16* %adjustedAngle, i32 0, i32 0
+  store i16 %0, i16* %adjustedAngle._value, align 2
+
+;print(unsignedInt: adjustedAngle &* UInt16(11))
+; breaks here
+  %adjustedAngle._value2 = getelementptr inbounds %Vs6UInt16, %Vs6UInt16* %adjustedAngle, i32 0, i32 0
+  %2 = load i16, i16* %adjustedAngle._value2, align 2
+
+; CHECK: mov r22, r24
+; CHECK: mov r23, r25
+
+; CHECK-DAG: ldi r20, 0
+; CHECK-DAG: ldi r21, 0
+; CHECK-DAG: ldi r18, 11
+; CHECK-DAG: ldi r19, 0
+
+; CHECK: mov r24, r20
+; CHECK: mov r25, r21
+; CHECK: call  __mulsi3
+  %3 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %2, i16 11)
+  %4 = extractvalue { i16, i1 } %3, 0
+  %5 = extractvalue { i16, i1 } %3, 1
+
+  ; above code looks fine, how is it lowered?
+  %6 = call i1 @_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_()
+  call void @_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_(i16 %4, i1 %6)
+
+; CHECK: ret
+  ret void
+}
+
+declare void @_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_(i16, i1) #0
+declare i1 @_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_() #0
+
+; Function Attrs: nounwind readnone speculatable
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) #2
+
+attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="core2" "target-features"="+ssse3,+cx16,+fxsr,+mmx,+x87,+sse,+sse2,+sse3" }
+attributes #2 = { nounwind readnone speculatable }

From d0f9139a13f3ce70b9001d5dfd0b4b005f777ec0 Mon Sep 17 00:00:00 2001
From: Carl Peto <carl.peto@me.com>
Date: Tue, 2 Apr 2019 23:16:55 +0100
Subject: [PATCH 2/3] changes suggested in PR

---
 lib/Target/AVR/AVRISelLowering.h               |  2 +-
 test/CodeGen/AVR/umul.with.overflow.i16-bug.ll | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index 7878047d3452..9765ce4bc984 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -131,7 +131,7 @@ class AVRTargetLowering : public TargetLowering {
 
   bool functionArgumentsSplitLittleEndian(const DataLayout &DL)
     const override {
-      return false;
+    return false;
   }
 
 private:
diff --git a/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
index 853d79405160..d287c2702cd2 100644
--- a/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
+++ b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.9"
 %Vs6UInt16 = type <{ i16 }>
 %Sb = type <{ i1 }>
 
-define hidden void @_TF4main13setServoAngleFT5angleVs6UInt16_T_(i16) #0 {
+define hidden void @setServoAngle(i16) #0 {
   ; CHECK-LABEL: entry
 entry:
   %adjustedAngle = alloca %Vs6UInt16, align 2
@@ -35,15 +35,15 @@ entry:
   %5 = extractvalue { i16, i1 } %3, 1
 
   ; above code looks fine, how is it lowered?
-  %6 = call i1 @_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_()
-  call void @_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_(i16 %4, i1 %6)
+  %6 = call i1 @printDefaultParam()
+  call void @print(i16 %4, i1 %6)
 
 ; CHECK: ret
   ret void
 }
 
-declare void @_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_(i16, i1) #0
-declare i1 @_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_() #0
+declare void @print(i16, i1) #0
+declare i1 @printDefaultParam() #0
 
 ; Function Attrs: nounwind readnone speculatable
 declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) #2

From ebf0947ac4df0aa31d68df5d74b9285fe9b15e9e Mon Sep 17 00:00:00 2001
From: Carl Peto <carl.peto@me.com>
Date: Tue, 2 Apr 2019 23:36:14 +0100
Subject: [PATCH 3/3] PR changes: - more test cleanup - name change

---
 include/llvm/CodeGen/TargetLowering.h          |  2 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp       |  2 +-
 lib/Target/AVR/AVRISelLowering.h               |  2 +-
 test/CodeGen/AVR/umul.with.overflow.i16-bug.ll | 11 ++++-------
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index fe97fecc444c..36f1b5b1c2ea 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -3363,7 +3363,7 @@ class TargetLowering : public TargetLoweringBase {
   /// but for some platform that would break. So this method will default to
   /// matching the endianness but can be overridden.
   virtual bool
-  functionArgumentsSplitLittleEndian(const DataLayout &DL) const {
+  shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
     return DL.isLittleEndian();
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 0dd7226d2cb0..956bd51fb88b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3398,7 +3398,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // being a legal type for the architecture and thus has to be split to
       // two arguments.
       SDValue Ret;
-      if(TLI.functionArgumentsSplitLittleEndian(DAG.getDataLayout())) {
+      if(TLI.shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
         // Halves of WideVT are packed into registers in different order
         // depending on platform endianness. This is usually handled by
         // the C calling convention, but we can't defer to it in
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index 9765ce4bc984..a3f8cec56a6e 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -129,7 +129,7 @@ class AVRTargetLowering : public TargetLowering {
   unsigned getRegisterByName(const char* RegName, EVT VT,
                              SelectionDAG &DAG) const override;
 
-  bool functionArgumentsSplitLittleEndian(const DataLayout &DL)
+  bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
     const override {
     return false;
   }
diff --git a/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
index d287c2702cd2..4ea8f32b61d1 100644
--- a/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
+++ b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.9"
 %Vs6UInt16 = type <{ i16 }>
 %Sb = type <{ i1 }>
 
-define hidden void @setServoAngle(i16) #0 {
+define hidden void @setServoAngle(i16) {
   ; CHECK-LABEL: entry
 entry:
   %adjustedAngle = alloca %Vs6UInt16, align 2
@@ -42,11 +42,8 @@ entry:
   ret void
 }
 
-declare void @print(i16, i1) #0
-declare i1 @printDefaultParam() #0
+declare void @print(i16, i1)
+declare i1 @printDefaultParam()
 
 ; Function Attrs: nounwind readnone speculatable
-declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) #2
-
-attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="core2" "target-features"="+ssse3,+cx16,+fxsr,+mmx,+x87,+sse,+sse2,+sse3" }
-attributes #2 = { nounwind readnone speculatable }
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16)