gnu: Add kafs-client
[jackhill/guix/guix.git] / gnu / packages / patches / llvm-9-fix-bitcast-miscompilation.patch
1 From f8e146f3430de3a6cd904f3f3f7aa1bfaefee14c Mon Sep 17 00:00:00 2001
2 From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
3 Date: Thu, 28 Nov 2019 23:18:28 +0100
4 Subject: [PATCH] [InstCombine] Fix big-endian miscompile of (bitcast
5 (zext/trunc (bitcast)))
6
7 Summary:
8 optimizeVectorResize is rewriting patterns like:
9 %1 = bitcast vector %src to integer
10 %2 = trunc/zext %1
11 %dst = bitcast %2 to vector
12
13 Since bitcasting between integer an vector types gives
14 different integer values depending on endianness, we need
15 to take endianness into account. As it happens the old
16 implementation only produced the correct result for little
17 endian targets.
18
19 Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178
20
21 Reviewers: spatel, lattner, lebedev.ri
22
23 Reviewed By: spatel, lebedev.ri
24
25 Subscribers: lebedev.ri, hiraditya, uabelho, llvm-commits
26
27 Tags: #llvm
28
29 Differential Revision: https://reviews.llvm.org/D70844
30
31 (cherry picked from commit a9d6b0e5444741d08ff1df7cf71d1559e7fefc1f)
32 ---
33 .../InstCombine/InstCombineCasts.cpp | 79 +++++++++++++------
34 llvm/test/Transforms/InstCombine/cast.ll | 6 +-
35 2 files changed, 60 insertions(+), 25 deletions(-)
36
37 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
38 index 2c9ba203fbf3..0af3de300e77 100644
39 --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
40 +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
41 @@ -18,6 +18,7 @@
42 #include "llvm/IR/DIBuilder.h"
43 #include "llvm/IR/PatternMatch.h"
44 #include "llvm/Support/KnownBits.h"
45 +#include <numeric>
46 using namespace llvm;
47 using namespace PatternMatch;
48
49 @@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
50 }
51
52 /// This input value (which is known to have vector type) is being zero extended
53 -/// or truncated to the specified vector type.
54 +/// or truncated to the specified vector type. Since the zext/trunc is done
55 +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
56 +/// endianness will impact which end of the vector that is extended or
57 +/// truncated.
58 +///
59 +/// A vector is always stored with index 0 at the lowest address, which
60 +/// corresponds to the most significant bits for a big endian stored integer and
61 +/// the least significant bits for little endian. A trunc/zext of an integer
62 +/// impacts the big end of the integer. Thus, we need to add/remove elements at
63 +/// the front of the vector for big endian targets, and the back of the vector
64 +/// for little endian targets.
65 +///
66 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
67 ///
68 /// The source and destination vector types may have different element types.
69 -static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
70 - InstCombiner &IC) {
71 +static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
72 + VectorType *DestTy,
73 + InstCombiner &IC) {
74 // We can only do this optimization if the output is a multiple of the input
75 // element size, or the input is a multiple of the output element size.
76 // Convert the input type to have the same element type as the output.
77 @@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
78 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
79 }
80
81 + bool IsBigEndian = IC.getDataLayout().isBigEndian();
82 + unsigned SrcElts = SrcTy->getNumElements();
83 + unsigned DestElts = DestTy->getNumElements();
84 +
85 + assert(SrcElts != DestElts && "Element counts should be different.");
86 +
87 // Now that the element types match, get the shuffle mask and RHS of the
88 // shuffle to use, which depends on whether we're increasing or decreasing the
89 // size of the input.
90 - SmallVector<uint32_t, 16> ShuffleMask;
91 + SmallVector<uint32_t, 16> ShuffleMaskStorage;
92 + ArrayRef<uint32_t> ShuffleMask;
93 Value *V2;
94
95 - if (SrcTy->getNumElements() > DestTy->getNumElements()) {
96 - // If we're shrinking the number of elements, just shuffle in the low
97 - // elements from the input and use undef as the second shuffle input.
98 - V2 = UndefValue::get(SrcTy);
99 - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
100 - ShuffleMask.push_back(i);
101 + // Produce an identify shuffle mask for the src vector.
102 + ShuffleMaskStorage.resize(SrcElts);
103 + std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
104
105 + if (SrcElts > DestElts) {
106 + // If we're shrinking the number of elements (rewriting an integer
107 + // truncate), just shuffle in the elements corresponding to the least
108 + // significant bits from the input and use undef as the second shuffle
109 + // input.
110 + V2 = UndefValue::get(SrcTy);
111 + // Make sure the shuffle mask selects the "least significant bits" by
112 + // keeping elements from back of the src vector for big endian, and from the
113 + // front for little endian.
114 + ShuffleMask = ShuffleMaskStorage;
115 + if (IsBigEndian)
116 + ShuffleMask = ShuffleMask.take_back(DestElts);
117 + else
118 + ShuffleMask = ShuffleMask.take_front(DestElts);
119 } else {
120 - // If we're increasing the number of elements, shuffle in all of the
121 - // elements from InVal and fill the rest of the result elements with zeros
122 - // from a constant zero.
123 + // If we're increasing the number of elements (rewriting an integer zext),
124 + // shuffle in all of the elements from InVal. Fill the rest of the result
125 + // elements with zeros from a constant zero.
126 V2 = Constant::getNullValue(SrcTy);
127 - unsigned SrcElts = SrcTy->getNumElements();
128 - for (unsigned i = 0, e = SrcElts; i != e; ++i)
129 - ShuffleMask.push_back(i);
130 -
131 - // The excess elements reference the first element of the zero input.
132 - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
133 - ShuffleMask.push_back(SrcElts);
134 + // Use first elt from V2 when indicating zero in the shuffle mask.
135 + uint32_t NullElt = SrcElts;
136 + // Extend with null values in the "most significant bits" by adding elements
137 + // in front of the src vector for big endian, and at the back for little
138 + // endian.
139 + unsigned DeltaElts = DestElts - SrcElts;
140 + if (IsBigEndian)
141 + ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
142 + else
143 + ShuffleMaskStorage.append(DeltaElts, NullElt);
144 + ShuffleMask = ShuffleMaskStorage;
145 }
146
147 return new ShuffleVectorInst(InVal, V2,
148 @@ -2359,8 +2394,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
149 CastInst *SrcCast = cast<CastInst>(Src);
150 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
151 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
152 - if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
153 - cast<VectorType>(DestTy), *this))
154 + if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
155 + BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
156 return I;
157 }
158
159 diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
160 index b6d1eda0601d..3ce8de033422 100644
161 --- llvm/test/Transforms/InstCombine/cast.ll
162 +++ llvm/test/Transforms/InstCombine/cast.ll
163 @@ -824,7 +824,7 @@ define i64 @test59(i8 %A, i8 %B) {
164
165 define <3 x i32> @test60(<4 x i32> %call4) {
166 ; CHECK-LABEL: @test60(
167 -; CHECK-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
168 +; CHECK-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
169 ; CHECK-NEXT: ret <3 x i32> [[P10]]
170 ;
171 %p11 = bitcast <4 x i32> %call4 to i128
172 @@ -836,7 +836,7 @@ define <3 x i32> @test60(<4 x i32> %call4) {
173
174 define <4 x i32> @test61(<3 x i32> %call4) {
175 ; CHECK-LABEL: @test61(
176 -; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
177 +; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
178 ; CHECK-NEXT: ret <4 x i32> [[P10]]
179 ;
180 %p11 = bitcast <3 x i32> %call4 to i96
181 @@ -848,7 +848,7 @@ define <4 x i32> @test61(<3 x i32> %call4) {
182 define <4 x i32> @test62(<3 x float> %call4) {
183 ; CHECK-LABEL: @test62(
184 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32>
185 -; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
186 +; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
187 ; CHECK-NEXT: ret <4 x i32> [[P10]]
188 ;
189 %p11 = bitcast <3 x float> %call4 to i96
190 --
191 2.26.2
192