gnu/packages/patches/llvm-9-fix-bitcast-miscompilation.patch

   1 From f8e146f3430de3a6cd904f3f3f7aa1bfaefee14c Mon Sep 17 00:00:00 2001
   2 From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
   3 Date: Thu, 28 Nov 2019 23:18:28 +0100
   4 Subject: [PATCH] [InstCombine] Fix big-endian miscompile of (bitcast
   5  (zext/trunc (bitcast)))
   6
   7 Summary:
   8 optimizeVectorResize is rewriting patterns like:
   9   %1 = bitcast vector %src to integer
  10   %2 = trunc/zext %1
  11   %dst = bitcast %2 to vector
  12
  13 Since bitcasting between integer an vector types gives
  14 different integer values depending on endianness, we need
  15 to take endianness into account. As it happens the old
  16 implementation only produced the correct result for little
  17 endian targets.
  18
  19 Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178
  20
  21 Reviewers: spatel, lattner, lebedev.ri
  22
  23 Reviewed By: spatel, lebedev.ri
  24
  25 Subscribers: lebedev.ri, hiraditya, uabelho, llvm-commits
  26
  27 Tags: #llvm
  28
  29 Differential Revision: https://reviews.llvm.org/D70844
  30
  31 (cherry picked from commit a9d6b0e5444741d08ff1df7cf71d1559e7fefc1f)
  32 ---
  33  .../InstCombine/InstCombineCasts.cpp          | 79 +++++++++++++------
  34  llvm/test/Transforms/InstCombine/cast.ll      |  6 +-
  35  2 files changed, 60 insertions(+), 25 deletions(-)
  36
  37 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
  38 index 2c9ba203fbf3..0af3de300e77 100644
  39 --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
  40 +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
  41 @@ -18,6 +18,7 @@
  42  #include "llvm/IR/DIBuilder.h"
  43  #include "llvm/IR/PatternMatch.h"
  44  #include "llvm/Support/KnownBits.h"
  45 +#include <numeric>
  46  using namespace llvm;
  47  using namespace PatternMatch;
  48
  49 @@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
  50  }
  51
  52  /// This input value (which is known to have vector type) is being zero extended
  53 -/// or truncated to the specified vector type.
  54 +/// or truncated to the specified vector type. Since the zext/trunc is done
  55 +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
  56 +/// endianness will impact which end of the vector that is extended or
  57 +/// truncated.
  58 +///
  59 +/// A vector is always stored with index 0 at the lowest address, which
  60 +/// corresponds to the most significant bits for a big endian stored integer and
  61 +/// the least significant bits for little endian. A trunc/zext of an integer
  62 +/// impacts the big end of the integer. Thus, we need to add/remove elements at
  63 +/// the front of the vector for big endian targets, and the back of the vector
  64 +/// for little endian targets.
  65 +///
  66  /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
  67  ///
  68  /// The source and destination vector types may have different element types.
  69 -static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
  70 -                                         InstCombiner &IC) {
  71 +static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
  72 +                                                            VectorType *DestTy,
  73 +                                                            InstCombiner &IC) {
  74    // We can only do this optimization if the output is a multiple of the input
  75    // element size, or the input is a multiple of the output element size.
  76    // Convert the input type to have the same element type as the output.
  77 @@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
  78      InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
  79    }
  80
  81 +  bool IsBigEndian = IC.getDataLayout().isBigEndian();
  82 +  unsigned SrcElts = SrcTy->getNumElements();
  83 +  unsigned DestElts = DestTy->getNumElements();
  84 +
  85 +  assert(SrcElts != DestElts && "Element counts should be different.");
  86 +
  87    // Now that the element types match, get the shuffle mask and RHS of the
  88    // shuffle to use, which depends on whether we're increasing or decreasing the
  89    // size of the input.
  90 -  SmallVector<uint32_t, 16> ShuffleMask;
  91 +  SmallVector<uint32_t, 16> ShuffleMaskStorage;
  92 +  ArrayRef<uint32_t> ShuffleMask;
  93    Value *V2;
  94
  95 -  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
  96 -    // If we're shrinking the number of elements, just shuffle in the low
  97 -    // elements from the input and use undef as the second shuffle input.
  98 -    V2 = UndefValue::get(SrcTy);
  99 -    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
 100 -      ShuffleMask.push_back(i);
 101 +  // Produce an identify shuffle mask for the src vector.
 102 +  ShuffleMaskStorage.resize(SrcElts);
 103 +  std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
 104
 105 +  if (SrcElts > DestElts) {
 106 +    // If we're shrinking the number of elements (rewriting an integer
 107 +    // truncate), just shuffle in the elements corresponding to the least
 108 +    // significant bits from the input and use undef as the second shuffle
 109 +    // input.
 110 +    V2 = UndefValue::get(SrcTy);
 111 +    // Make sure the shuffle mask selects the "least significant bits" by
 112 +    // keeping elements from back of the src vector for big endian, and from the
 113 +    // front for little endian.
 114 +    ShuffleMask = ShuffleMaskStorage;
 115 +    if (IsBigEndian)
 116 +      ShuffleMask = ShuffleMask.take_back(DestElts);
 117 +    else
 118 +      ShuffleMask = ShuffleMask.take_front(DestElts);
 119    } else {
 120 -    // If we're increasing the number of elements, shuffle in all of the
 121 -    // elements from InVal and fill the rest of the result elements with zeros
 122 -    // from a constant zero.
 123 +    // If we're increasing the number of elements (rewriting an integer zext),
 124 +    // shuffle in all of the elements from InVal. Fill the rest of the result
 125 +    // elements with zeros from a constant zero.
 126      V2 = Constant::getNullValue(SrcTy);
 127 -    unsigned SrcElts = SrcTy->getNumElements();
 128 -    for (unsigned i = 0, e = SrcElts; i != e; ++i)
 129 -      ShuffleMask.push_back(i);
 130 -
 131 -    // The excess elements reference the first element of the zero input.
 132 -    for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
 133 -      ShuffleMask.push_back(SrcElts);
 134 +    // Use first elt from V2 when indicating zero in the shuffle mask.
 135 +    uint32_t NullElt = SrcElts;
 136 +    // Extend with null values in the "most significant bits" by adding elements
 137 +    // in front of the src vector for big endian, and at the back for little
 138 +    // endian.
 139 +    unsigned DeltaElts = DestElts - SrcElts;
 140 +    if (IsBigEndian)
 141 +      ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
 142 +    else
 143 +      ShuffleMaskStorage.append(DeltaElts, NullElt);
 144 +    ShuffleMask = ShuffleMaskStorage;
 145    }
 146
 147    return new ShuffleVectorInst(InVal, V2,
 148 @@ -2359,8 +2394,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
 149          CastInst *SrcCast = cast<CastInst>(Src);
 150          if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
 151            if (isa<VectorType>(BCIn->getOperand(0)->getType()))
 152 -            if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
 153 -                                               cast<VectorType>(DestTy), *this))
 154 +            if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
 155 +                    BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
 156                return I;
 157        }
 158
 159 diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
 160 index b6d1eda0601d..3ce8de033422 100644
 161 --- llvm/test/Transforms/InstCombine/cast.ll
 162 +++ llvm/test/Transforms/InstCombine/cast.ll
 163 @@ -824,7 +824,7 @@ define i64 @test59(i8 %A, i8 %B) {
 164
 165  define <3 x i32> @test60(<4 x i32> %call4) {
 166  ; CHECK-LABEL: @test60(
 167 -; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
 168 +; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
 169  ; CHECK-NEXT:    ret <3 x i32> [[P10]]
 170  ;
 171    %p11 = bitcast <4 x i32> %call4 to i128
 172 @@ -836,7 +836,7 @@ define <3 x i32> @test60(<4 x i32> %call4) {
 173
 174  define <4 x i32> @test61(<3 x i32> %call4) {
 175  ; CHECK-LABEL: @test61(
 176 -; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 177 +; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
 178  ; CHECK-NEXT:    ret <4 x i32> [[P10]]
 179  ;
 180    %p11 = bitcast <3 x i32> %call4 to i96
 181 @@ -848,7 +848,7 @@ define <4 x i32> @test61(<3 x i32> %call4) {
 182  define <4 x i32> @test62(<3 x float> %call4) {
 183  ; CHECK-LABEL: @test62(
 184  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32>
 185 -; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 186 +; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
 187  ; CHECK-NEXT:    ret <4 x i32> [[P10]]
 188  ;
 189    %p11 = bitcast <3 x float> %call4 to i96
 190 --
 191 2.26.2
 192