Test Failure: Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll

Test source: git

Log:

Source: <stdin>
-- 1. ModuleToFunctionPassAdaptor
-- 1. PassManager<llvm::Function> : Skipping NOP
-- 2. SLPVectorizerPass

----------------------------------------
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smaxv6() {
#0:
  %load1 = load i32, ptr @arr, align 16
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 1
  %load2 = load i32, ptr %__constexpr_0, align 4
  %cmp1 = icmp sgt i32 %load1, %load2
  %select1 = select i1 %cmp1, i32 %load1, i32 %load2
  %__constexpr_1 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %load3 = load i32, ptr %__constexpr_1, align 8
  %cmp2 = icmp sgt i32 %select1, %load3
  %select2 = select i1 %cmp2, i32 %select1, i32 %load3
  %__constexpr_2 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 3
  %load4 = load i32, ptr %__constexpr_2, align 4
  %cmp3 = icmp sgt i32 %select2, %load4
  %select3 = select i1 %cmp3, i32 %select2, i32 %load4
  %__constexpr_3 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 4
  %load5 = load i32, ptr %__constexpr_3, align 16
  %cmp4 = icmp sgt i32 %select3, %load5
  %select4 = select i1 %cmp4, i32 %select3, i32 %load5
  %__constexpr_4 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 5
  %load6 = load i32, ptr %__constexpr_4, align 4
  %cmp5 = icmp sgt i32 %select4, %load6
  %select5 = select i1 %cmp5, i32 %select4, i32 %load6
  %store-select = select i1 %cmp1, i32 3, i32 4
  store i32 %store-select, ptr @var, align 8
  ret i32 %select5
}
Transformation seems to be correct! (syntactically equal)

-- 3. SLPVectorizerPass

----------------------------------------
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smaxv6() {
#0:
  %load1 = load i32, ptr @arr, align 16
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 1
  %load2 = load i32, ptr %__constexpr_0, align 4
  %cmp1 = icmp sgt i32 %load1, %load2
  %select1 = select i1 %cmp1, i32 %load1, i32 %load2
  %__constexpr_1 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %load3 = load i32, ptr %__constexpr_1, align 8
  %cmp2 = icmp sgt i32 %select1, %load3
  %select2 = select i1 %cmp2, i32 %select1, i32 %load3
  %__constexpr_2 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 3
  %load4 = load i32, ptr %__constexpr_2, align 4
  %cmp3 = icmp sgt i32 %select2, %load4
  %select3 = select i1 %cmp3, i32 %select2, i32 %load4
  %__constexpr_3 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 4
  %load5 = load i32, ptr %__constexpr_3, align 16
  %cmp4 = icmp sgt i32 %select3, %load5
  %select4 = select i1 %cmp4, i32 %select3, i32 %load5
  %__constexpr_4 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 5
  %load6 = load i32, ptr %__constexpr_4, align 4
  %cmp5 = icmp sgt i32 %select4, %load6
  %select5 = select i1 %cmp5, i32 %select4, i32 %load6
  %store-select = select i1 %cmp1, i32 3, i32 4
  store i32 %store-select, ptr @var, align 8
  ret i32 %select5
}
=>
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smaxv6() {
#0:
  %#1 = load <2 x i32>, ptr @arr, align 16
  %#2 = extractelement <2 x i32> %#1, i32 0
  %#3 = extractelement <2 x i32> %#1, i32 1
  %cmp1 = icmp sgt i32 %#2, %#3
  %select1 = select i1 %cmp1, i32 %#2, i32 %#3
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %#4 = load <4 x i32>, ptr %__constexpr_0, align 8
  %#5 = reduce_smax <4 x i32> %#4
  %op.rdx = icmp sgt i32 %#5, %select1
  %op.rdx1 = select i1 %op.rdx, i32 %#5, i32 %select1
  %store-select = select i1 %cmp1, i32 3, i32 4
  store i32 %store-select, ptr @var, align 8
  ret i32 %op.rdx1
}
Transformation seems to be correct!

-- 4. DCEPass

----------------------------------------
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smaxv6() {
#0:
  %#1 = load <2 x i32>, ptr @arr, align 16
  %#2 = extractelement <2 x i32> %#1, i32 0
  %#3 = extractelement <2 x i32> %#1, i32 1
  %cmp1 = icmp sgt i32 %#2, %#3
  %select1 = select i1 %cmp1, i32 %#2, i32 %#3
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %#4 = load <4 x i32>, ptr %__constexpr_0, align 8
  %#5 = reduce_smax <4 x i32> %#4
  %op.rdx = icmp sgt i32 %#5, %select1
  %op.rdx1 = select i1 %op.rdx, i32 %#5, i32 %select1
  %store-select = select i1 %cmp1, i32 3, i32 4
  store i32 %store-select, ptr @var, align 8
  ret i32 %op.rdx1
}
Transformation seems to be correct! (syntactically equal)

-- 5. DCEPass

----------------------------------------
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smaxv6() {
#0:
  %#1 = load <2 x i32>, ptr @arr, align 16
  %#2 = extractelement <2 x i32> %#1, i32 0
  %#3 = extractelement <2 x i32> %#1, i32 1
  %cmp1 = icmp sgt i32 %#2, %#3
  %select1 = select i1 %cmp1, i32 %#2, i32 %#3
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %#4 = load <4 x i32>, ptr %__constexpr_0, align 8
  %#5 = reduce_smax <4 x i32> %#4
  %op.rdx = icmp sgt i32 %#5, %select1
  %op.rdx1 = select i1 %op.rdx, i32 %#5, i32 %select1
  %store-select = select i1 %cmp1, i32 3, i32 4
  store i32 %store-select, ptr @var, align 8
  ret i32 %op.rdx1
}
Transformation seems to be correct! (syntactically equal)

-- 6. PassManager<llvm::Function> : Skipping NOP
-- 7. PassManager<llvm::Function> : Skipping NOP
-- 8. SLPVectorizerPass

----------------------------------------
@arr64 = global 256 bytes, align 16
@var64 = global 8 bytes, align 8

define i64 @sminv6() {
#0:
  %load1 = load i64, ptr @arr64, align 16
  %__constexpr_0 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 1
  %load2 = load i64, ptr %__constexpr_0, align 8
  %cmp1 = icmp slt i64 %load1, %load2
  %select1 = select i1 %cmp1, i64 %load1, i64 %load2
  %__constexpr_1 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 2
  %load3 = load i64, ptr %__constexpr_1, align 16
  %cmp2 = icmp slt i64 %select1, %load3
  %select2 = select i1 %cmp2, i64 %select1, i64 %load3
  %__constexpr_2 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 3
  %load4 = load i64, ptr %__constexpr_2, align 8
  %cmp3 = icmp slt i64 %select2, %load4
  %select3 = select i1 %cmp3, i64 %select2, i64 %load4
  %__constexpr_3 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 4
  %load5 = load i64, ptr %__constexpr_3, align 16
  %cmp4 = icmp slt i64 %select3, %load5
  %select4 = select i1 %cmp4, i64 %select3, i64 %load5
  %__constexpr_4 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 5
  %load6 = load i64, ptr %__constexpr_4, align 8
  %cmp5 = icmp slt i64 %select4, %load6
  %select5 = select i1 %cmp5, i64 %select4, i64 %load6
  %store-select = select i1 %cmp1, i64 3, i64 4
  store i64 %store-select, ptr @var64, align 8
  ret i64 %select5
}
Transformation seems to be correct! (syntactically equal)

-- 9. SLPVectorizerPass

----------------------------------------
@arr64 = global 256 bytes, align 16
@var64 = global 8 bytes, align 8

define i64 @sminv6() {
#0:
  %load1 = load i64, ptr @arr64, align 16
  %__constexpr_0 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 1
  %load2 = load i64, ptr %__constexpr_0, align 8
  %cmp1 = icmp slt i64 %load1, %load2
  %select1 = select i1 %cmp1, i64 %load1, i64 %load2
  %__constexpr_1 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 2
  %load3 = load i64, ptr %__constexpr_1, align 16
  %cmp2 = icmp slt i64 %select1, %load3
  %select2 = select i1 %cmp2, i64 %select1, i64 %load3
  %__constexpr_2 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 3
  %load4 = load i64, ptr %__constexpr_2, align 8
  %cmp3 = icmp slt i64 %select2, %load4
  %select3 = select i1 %cmp3, i64 %select2, i64 %load4
  %__constexpr_3 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 4
  %load5 = load i64, ptr %__constexpr_3, align 16
  %cmp4 = icmp slt i64 %select3, %load5
  %select4 = select i1 %cmp4, i64 %select3, i64 %load5
  %__constexpr_4 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 5
  %load6 = load i64, ptr %__constexpr_4, align 8
  %cmp5 = icmp slt i64 %select4, %load6
  %select5 = select i1 %cmp5, i64 %select4, i64 %load6
  %store-select = select i1 %cmp1, i64 3, i64 4
  store i64 %store-select, ptr @var64, align 8
  ret i64 %select5
}
=>
@arr64 = global 256 bytes, align 16
@var64 = global 8 bytes, align 8

define i64 @sminv6() {
#0:
  %#1 = load <2 x i64>, ptr @arr64, align 16
  %#2 = extractelement <2 x i64> %#1, i32 0
  %#3 = extractelement <2 x i64> %#1, i32 1
  %cmp1 = icmp slt i64 %#2, %#3
  %select1 = select i1 %cmp1, i64 %#2, i64 %#3
  %__constexpr_0 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 2
  %#4 = load <4 x i64>, ptr %__constexpr_0, align 16
  %#5 = reduce_smin <4 x i64> %#4
  %op.rdx = icmp slt i64 %#5, %select1
  %op.rdx1 = select i1 %op.rdx, i64 %#5, i64 %select1
  %store-select = select i1 %cmp1, i64 3, i64 4
  store i64 %store-select, ptr @var64, align 8
  ret i64 %op.rdx1
}
Transformation seems to be correct!

-- 10. DCEPass

----------------------------------------
@arr64 = global 256 bytes, align 16
@var64 = global 8 bytes, align 8

define i64 @sminv6() {
#0:
  %#1 = load <2 x i64>, ptr @arr64, align 16
  %#2 = extractelement <2 x i64> %#1, i32 0
  %#3 = extractelement <2 x i64> %#1, i32 1
  %cmp1 = icmp slt i64 %#2, %#3
  %select1 = select i1 %cmp1, i64 %#2, i64 %#3
  %__constexpr_0 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 2
  %#4 = load <4 x i64>, ptr %__constexpr_0, align 16
  %#5 = reduce_smin <4 x i64> %#4
  %op.rdx = icmp slt i64 %#5, %select1
  %op.rdx1 = select i1 %op.rdx, i64 %#5, i64 %select1
  %store-select = select i1 %cmp1, i64 3, i64 4
  store i64 %store-select, ptr @var64, align 8
  ret i64 %op.rdx1
}
Transformation seems to be correct! (syntactically equal)

-- 11. DCEPass

----------------------------------------
@arr64 = global 256 bytes, align 16
@var64 = global 8 bytes, align 8

define i64 @sminv6() {
#0:
  %#1 = load <2 x i64>, ptr @arr64, align 16
  %#2 = extractelement <2 x i64> %#1, i32 0
  %#3 = extractelement <2 x i64> %#1, i32 1
  %cmp1 = icmp slt i64 %#2, %#3
  %select1 = select i1 %cmp1, i64 %#2, i64 %#3
  %__constexpr_0 = gep inbounds ptr @arr64, 256 x i64 0, 8 x i64 2
  %#4 = load <4 x i64>, ptr %__constexpr_0, align 16
  %#5 = reduce_smin <4 x i64> %#4
  %op.rdx = icmp slt i64 %#5, %select1
  %op.rdx1 = select i1 %op.rdx, i64 %#5, i64 %select1
  %store-select = select i1 %cmp1, i64 3, i64 4
  store i64 %store-select, ptr @var64, align 8
  ret i64 %op.rdx1
}
Transformation seems to be correct! (syntactically equal)

-- 12. PassManager<llvm::Function> : Skipping NOP
-- 13. PassManager<llvm::Function> : Skipping NOP
-- 14. SLPVectorizerPass

----------------------------------------
@farr = global 128 bytes, align 16
@fvar = global 4 bytes, align 8

define float @fmaxv6() {
#0:
  %load1 = load float, ptr @farr, align 16
  %__constexpr_0 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 1
  %load2 = load float, ptr %__constexpr_0, align 4
  %cmp1 = fcmp fast ogt float %load1, %load2
  %select1 = select i1 %cmp1, float %load1, float %load2
  %__constexpr_1 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 2
  %load3 = load float, ptr %__constexpr_1, align 8
  %cmp2 = fcmp fast ogt float %select1, %load3
  %select2 = select i1 %cmp2, float %select1, float %load3
  %__constexpr_2 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 3
  %load4 = load float, ptr %__constexpr_2, align 4
  %cmp3 = fcmp fast ogt float %select2, %load4
  %select3 = select i1 %cmp3, float %select2, float %load4
  %__constexpr_3 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 4
  %load5 = load float, ptr %__constexpr_3, align 16
  %cmp4 = fcmp fast ogt float %select3, %load5
  %select4 = select i1 %cmp4, float %select3, float %load5
  %__constexpr_4 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 5
  %load6 = load float, ptr %__constexpr_4, align 4
  %cmp5 = fcmp fast ogt float %select4, %load6
  %select5 = select i1 %cmp5, float %select4, float %load6
  %store-select = select i1 %cmp1, float 3.000000, float 4.000000
  store float %store-select, ptr @fvar, align 8
  ret float %select5
}
Transformation seems to be correct! (syntactically equal)

-- 15. SLPVectorizerPass

----------------------------------------
@farr = global 128 bytes, align 16
@fvar = global 4 bytes, align 8

define float @fmaxv6() {
#0:
  %load1 = load float, ptr @farr, align 16
  %__constexpr_0 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 1
  %load2 = load float, ptr %__constexpr_0, align 4
  %cmp1 = fcmp fast ogt float %load1, %load2
  %select1 = select i1 %cmp1, float %load1, float %load2
  %__constexpr_1 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 2
  %load3 = load float, ptr %__constexpr_1, align 8
  %cmp2 = fcmp fast ogt float %select1, %load3
  %select2 = select i1 %cmp2, float %select1, float %load3
  %__constexpr_2 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 3
  %load4 = load float, ptr %__constexpr_2, align 4
  %cmp3 = fcmp fast ogt float %select2, %load4
  %select3 = select i1 %cmp3, float %select2, float %load4
  %__constexpr_3 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 4
  %load5 = load float, ptr %__constexpr_3, align 16
  %cmp4 = fcmp fast ogt float %select3, %load5
  %select4 = select i1 %cmp4, float %select3, float %load5
  %__constexpr_4 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 5
  %load6 = load float, ptr %__constexpr_4, align 4
  %cmp5 = fcmp fast ogt float %select4, %load6
  %select5 = select i1 %cmp5, float %select4, float %load6
  %store-select = select i1 %cmp1, float 3.000000, float 4.000000
  store float %store-select, ptr @fvar, align 8
  ret float %select5
}
=>
@farr = global 128 bytes, align 16
@fvar = global 4 bytes, align 8

define float @fmaxv6() {
#0:
  %#1 = load <2 x float>, ptr @farr, align 16
  %#2 = extractelement <2 x float> %#1, i32 0
  %#3 = extractelement <2 x float> %#1, i32 1
  %cmp1 = fcmp fast ogt float %#2, %#3
  %select1 = select i1 %cmp1, float %#2, float %#3
  %__constexpr_0 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 2
  %load3 = load float, ptr %__constexpr_0, align 8
  %cmp2 = fcmp fast ogt float %select1, %load3
  %select2 = select i1 %cmp2, float %select1, float %load3
  %__constexpr_1 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 3
  %load4 = load float, ptr %__constexpr_1, align 4
  %cmp3 = fcmp fast ogt float %select2, %load4
  %select3 = select i1 %cmp3, float %select2, float %load4
  %__constexpr_2 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 4
  %load5 = load float, ptr %__constexpr_2, align 16
  %cmp4 = fcmp fast ogt float %select3, %load5
  %select4 = select i1 %cmp4, float %select3, float %load5
  %__constexpr_3 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 5
  %load6 = load float, ptr %__constexpr_3, align 4
  %cmp5 = fcmp fast ogt float %select4, %load6
  %select5 = select i1 %cmp5, float %select4, float %load6
  %store-select = select i1 %cmp1, float 3.000000, float 4.000000
  store float %store-select, ptr @fvar, align 8
  ret float %select5
}
Transformation seems to be correct!

-- 16. DCEPass

----------------------------------------
@farr = global 128 bytes, align 16
@fvar = global 4 bytes, align 8

define float @fmaxv6() {
#0:
  %#1 = load <2 x float>, ptr @farr, align 16
  %#2 = extractelement <2 x float> %#1, i32 0
  %#3 = extractelement <2 x float> %#1, i32 1
  %cmp1 = fcmp fast ogt float %#2, %#3
  %select1 = select i1 %cmp1, float %#2, float %#3
  %__constexpr_0 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 2
  %load3 = load float, ptr %__constexpr_0, align 8
  %cmp2 = fcmp fast ogt float %select1, %load3
  %select2 = select i1 %cmp2, float %select1, float %load3
  %__constexpr_1 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 3
  %load4 = load float, ptr %__constexpr_1, align 4
  %cmp3 = fcmp fast ogt float %select2, %load4
  %select3 = select i1 %cmp3, float %select2, float %load4
  %__constexpr_2 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 4
  %load5 = load float, ptr %__constexpr_2, align 16
  %cmp4 = fcmp fast ogt float %select3, %load5
  %select4 = select i1 %cmp4, float %select3, float %load5
  %__constexpr_3 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 5
  %load6 = load float, ptr %__constexpr_3, align 4
  %cmp5 = fcmp fast ogt float %select4, %load6
  %select5 = select i1 %cmp5, float %select4, float %load6
  %store-select = select i1 %cmp1, float 3.000000, float 4.000000
  store float %store-select, ptr @fvar, align 8
  ret float %select5
}
Transformation seems to be correct! (syntactically equal)

-- 17. DCEPass

----------------------------------------
@farr = global 128 bytes, align 16
@fvar = global 4 bytes, align 8

define float @fmaxv6() {
#0:
  %#1 = load <2 x float>, ptr @farr, align 16
  %#2 = extractelement <2 x float> %#1, i32 0
  %#3 = extractelement <2 x float> %#1, i32 1
  %cmp1 = fcmp fast ogt float %#2, %#3
  %select1 = select i1 %cmp1, float %#2, float %#3
  %__constexpr_0 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 2
  %load3 = load float, ptr %__constexpr_0, align 8
  %cmp2 = fcmp fast ogt float %select1, %load3
  %select2 = select i1 %cmp2, float %select1, float %load3
  %__constexpr_1 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 3
  %load4 = load float, ptr %__constexpr_1, align 4
  %cmp3 = fcmp fast ogt float %select2, %load4
  %select3 = select i1 %cmp3, float %select2, float %load4
  %__constexpr_2 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 4
  %load5 = load float, ptr %__constexpr_2, align 16
  %cmp4 = fcmp fast ogt float %select3, %load5
  %select4 = select i1 %cmp4, float %select3, float %load5
  %__constexpr_3 = gep inbounds ptr @farr, 128 x i64 0, 4 x i64 5
  %load6 = load float, ptr %__constexpr_3, align 4
  %cmp5 = fcmp fast ogt float %select4, %load6
  %select5 = select i1 %cmp5, float %select4, float %load6
  %store-select = select i1 %cmp1, float 3.000000, float 4.000000
  store float %store-select, ptr @fvar, align 8
  ret float %select5
}
Transformation seems to be correct! (syntactically equal)

-- 18. PassManager<llvm::Function> : Skipping NOP
-- 19. PassManager<llvm::Function> : Skipping NOP
-- 20. SLPVectorizerPass

----------------------------------------
@darr = global 256 bytes, align 16
@dvar = global 8 bytes, align 8

define double @dminv6() {
#0:
  %load1 = load double, ptr @darr, align 16
  %__constexpr_0 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 1
  %load2 = load double, ptr %__constexpr_0, align 4
  %cmp1 = fcmp fast olt double %load1, %load2
  %select1 = select i1 %cmp1, double %load1, double %load2
  %__constexpr_1 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 2
  %load3 = load double, ptr %__constexpr_1, align 8
  %cmp2 = fcmp fast olt double %select1, %load3
  %select2 = select i1 %cmp2, double %select1, double %load3
  %__constexpr_2 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 3
  %load4 = load double, ptr %__constexpr_2, align 4
  %cmp3 = fcmp fast olt double %select2, %load4
  %select3 = select i1 %cmp3, double %select2, double %load4
  %__constexpr_3 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 4
  %load5 = load double, ptr %__constexpr_3, align 16
  %cmp4 = fcmp fast olt double %select3, %load5
  %select4 = select i1 %cmp4, double %select3, double %load5
  %__constexpr_4 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 5
  %load6 = load double, ptr %__constexpr_4, align 4
  %cmp5 = fcmp fast olt double %select4, %load6
  %select5 = select i1 %cmp5, double %select4, double %load6
  %store-select = select i1 %cmp1, double 3.000000, double 4.000000
  store double %store-select, ptr @dvar, align 8
  ret double %select5
}
Transformation seems to be correct! (syntactically equal)

-- 21. SLPVectorizerPass

----------------------------------------
@darr = global 256 bytes, align 16
@dvar = global 8 bytes, align 8

define double @dminv6() {
#0:
  %load1 = load double, ptr @darr, align 16
  %__constexpr_0 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 1
  %load2 = load double, ptr %__constexpr_0, align 8
  %cmp1 = fcmp fast olt double %load1, %load2
  %select1 = select i1 %cmp1, double %load1, double %load2
  %__constexpr_1 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 2
  %load3 = load double, ptr %__constexpr_1, align 16
  %cmp2 = fcmp fast olt double %select1, %load3
  %select2 = select i1 %cmp2, double %select1, double %load3
  %__constexpr_2 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 3
  %load4 = load double, ptr %__constexpr_2, align 8
  %cmp3 = fcmp fast olt double %select2, %load4
  %select3 = select i1 %cmp3, double %select2, double %load4
  %__constexpr_3 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 4
  %load5 = load double, ptr %__constexpr_3, align 16
  %cmp4 = fcmp fast olt double %select3, %load5
  %select4 = select i1 %cmp4, double %select3, double %load5
  %__constexpr_4 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 5
  %load6 = load double, ptr %__constexpr_4, align 8
  %cmp5 = fcmp fast olt double %select4, %load6
  %select5 = select i1 %cmp5, double %select4, double %load6
  %store-select = select i1 %cmp1, double 3.000000, double 4.000000
  store double %store-select, ptr @dvar, align 8
  ret double %select5
}
=>
@darr = global 256 bytes, align 16
@dvar = global 8 bytes, align 8

define double @dminv6() {
#0:
  %#1 = load <2 x double>, ptr @darr, align 16
  %#2 = extractelement <2 x double> %#1, i32 0
  %#3 = extractelement <2 x double> %#1, i32 1
  %cmp1 = fcmp fast olt double %#2, %#3
  %select1 = select i1 %cmp1, double %#2, double %#3
  %__constexpr_0 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 2
  %load3 = load double, ptr %__constexpr_0, align 16
  %cmp2 = fcmp fast olt double %select1, %load3
  %select2 = select i1 %cmp2, double %select1, double %load3
  %__constexpr_1 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 3
  %load4 = load double, ptr %__constexpr_1, align 8
  %cmp3 = fcmp fast olt double %select2, %load4
  %select3 = select i1 %cmp3, double %select2, double %load4
  %__constexpr_2 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 4
  %load5 = load double, ptr %__constexpr_2, align 16
  %cmp4 = fcmp fast olt double %select3, %load5
  %select4 = select i1 %cmp4, double %select3, double %load5
  %__constexpr_3 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 5
  %load6 = load double, ptr %__constexpr_3, align 8
  %cmp5 = fcmp fast olt double %select4, %load6
  %select5 = select i1 %cmp5, double %select4, double %load6
  %store-select = select i1 %cmp1, double 3.000000, double 4.000000
  store double %store-select, ptr @dvar, align 8
  ret double %select5
}
Transformation doesn't verify! (not unsound)
ERROR: Timeout
-- 22. DCEPass

----------------------------------------
@darr = global 256 bytes, align 16
@dvar = global 8 bytes, align 8

define double @dminv6() {
#0:
  %#1 = load <2 x double>, ptr @darr, align 16
  %#2 = extractelement <2 x double> %#1, i32 0
  %#3 = extractelement <2 x double> %#1, i32 1
  %cmp1 = fcmp fast olt double %#2, %#3
  %select1 = select i1 %cmp1, double %#2, double %#3
  %__constexpr_0 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 2
  %load3 = load double, ptr %__constexpr_0, align 8
  %cmp2 = fcmp fast olt double %select1, %load3
  %select2 = select i1 %cmp2, double %select1, double %load3
  %__constexpr_1 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 3
  %load4 = load double, ptr %__constexpr_1, align 4
  %cmp3 = fcmp fast olt double %select2, %load4
  %select3 = select i1 %cmp3, double %select2, double %load4
  %__constexpr_2 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 4
  %load5 = load double, ptr %__constexpr_2, align 16
  %cmp4 = fcmp fast olt double %select3, %load5
  %select4 = select i1 %cmp4, double %select3, double %load5
  %__constexpr_3 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 5
  %load6 = load double, ptr %__constexpr_3, align 4
  %cmp5 = fcmp fast olt double %select4, %load6
  %select5 = select i1 %cmp5, double %select4, double %load6
  %store-select = select i1 %cmp1, double 3.000000, double 4.000000
  store double %store-select, ptr @dvar, align 8
  ret double %select5
}
Transformation seems to be correct! (syntactically equal)

-- 23. DCEPass

----------------------------------------
@darr = global 256 bytes, align 16
@dvar = global 8 bytes, align 8

define double @dminv6() {
#0:
  %#1 = load <2 x double>, ptr @darr, align 16
  %#2 = extractelement <2 x double> %#1, i32 0
  %#3 = extractelement <2 x double> %#1, i32 1
  %cmp1 = fcmp fast olt double %#2, %#3
  %select1 = select i1 %cmp1, double %#2, double %#3
  %__constexpr_0 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 2
  %load3 = load double, ptr %__constexpr_0, align 8
  %cmp2 = fcmp fast olt double %select1, %load3
  %select2 = select i1 %cmp2, double %select1, double %load3
  %__constexpr_1 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 3
  %load4 = load double, ptr %__constexpr_1, align 4
  %cmp3 = fcmp fast olt double %select2, %load4
  %select3 = select i1 %cmp3, double %select2, double %load4
  %__constexpr_2 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 4
  %load5 = load double, ptr %__constexpr_2, align 16
  %cmp4 = fcmp fast olt double %select3, %load5
  %select4 = select i1 %cmp4, double %select3, double %load5
  %__constexpr_3 = gep inbounds ptr @darr, 256 x i64 0, 8 x i64 5
  %load6 = load double, ptr %__constexpr_3, align 4
  %cmp5 = fcmp fast olt double %select4, %load6
  %select5 = select i1 %cmp5, double %select4, double %load6
  %store-select = select i1 %cmp1, double 3.000000, double 4.000000
  store double %store-select, ptr @dvar, align 8
  ret double %select5
}
Transformation seems to be correct! (syntactically equal)

-- 24. PassManager<llvm::Function> : Skipping NOP
-- 25. PassManager<llvm::Function> : Skipping NOP
-- 26. SLPVectorizerPass

----------------------------------------
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smax_wdiff_valuenum(i32 %#0, i32 %v1) {
#1:
  %vload = load <2 x i32>, ptr @arr, align 16
  %elt1 = extractelement <2 x i32> %vload, i32 0
  %cmp1 = icmp sgt i32 %elt1, %v1
  %ex0 = extractelement <2 x i32> %vload, i32 0
  %select1 = select i1 %cmp1, i32 %ex0, i32 %v1
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %load3 = load i32, ptr %__constexpr_0, align 8
  %cmp2 = icmp sgt i32 %select1, %load3
  %select2 = select i1 %cmp2, i32 %select1, i32 %load3
  %__constexpr_1 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 3
  %load4 = load i32, ptr %__constexpr_1, align 4
  %cmp3 = icmp sgt i32 %select2, %load4
  %select3 = select i1 %cmp3, i32 %select2, i32 %load4
  %__constexpr_2 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 4
  %load5 = load i32, ptr %__constexpr_2, align 16
  %cmp4 = icmp sgt i32 %select3, %load5
  %select4 = select i1 %cmp4, i32 %select3, i32 %load5
  %__constexpr_3 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 5
  %load6 = load i32, ptr %__constexpr_3, align 4
  %cmp5 = icmp sgt i32 %select4, %load6
  %select5 = select i1 %cmp5, i32 %select4, i32 %load6
  %storeval = select i1 %cmp1, i32 3, i32 4
  store i32 %storeval, ptr @var, align 8
  ret i32 %select5
}
Transformation seems to be correct! (syntactically equal)

-- 27. SLPVectorizerPass

----------------------------------------
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smax_wdiff_valuenum(i32 %#0, i32 %v1) {
#1:
  %vload = load <2 x i32>, ptr @arr, align 16
  %elt1 = extractelement <2 x i32> %vload, i32 0
  %cmp1 = icmp sgt i32 %elt1, %v1
  %ex0 = extractelement <2 x i32> %vload, i32 0
  %select1 = select i1 %cmp1, i32 %ex0, i32 %v1
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %load3 = load i32, ptr %__constexpr_0, align 8
  %cmp2 = icmp sgt i32 %select1, %load3
  %select2 = select i1 %cmp2, i32 %select1, i32 %load3
  %__constexpr_1 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 3
  %load4 = load i32, ptr %__constexpr_1, align 4
  %cmp3 = icmp sgt i32 %select2, %load4
  %select3 = select i1 %cmp3, i32 %select2, i32 %load4
  %__constexpr_2 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 4
  %load5 = load i32, ptr %__constexpr_2, align 16
  %cmp4 = icmp sgt i32 %select3, %load5
  %select4 = select i1 %cmp4, i32 %select3, i32 %load5
  %__constexpr_3 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 5
  %load6 = load i32, ptr %__constexpr_3, align 4
  %cmp5 = icmp sgt i32 %select4, %load6
  %select5 = select i1 %cmp5, i32 %select4, i32 %load6
  %storeval = select i1 %cmp1, i32 3, i32 4
  store i32 %storeval, ptr @var, align 8
  ret i32 %select5
}
=>
@arr = global 128 bytes, align 16
@var = global 4 bytes, align 8

define i32 @smax_wdiff_valuenum(i32 %#0, i32 %v1) {
#1:
  %vload = load <2 x i32>, ptr @arr, align 16
  %elt1 = extractelement <2 x i32> %vload, i32 0
  %cmp1 = icmp sgt i32 %elt1, %v1
  %ex0 = extractelement <2 x i32> %vload, i32 0
  %select1 = select i1 %cmp1, i32 %ex0, i32 %v1
  %__constexpr_0 = gep inbounds ptr @arr, 128 x i64 0, 4 x i64 2
  %#2 = load <4 x i32>, ptr %__constexpr_0, align 8
  %#3 = reduce_smax <4 x i32> %#2
  %op.rdx = icmp sgt i32 %#3, %select1
  %op.rdx1 = select i1 %op.rdx, i32 %#3, i32 %select1
  %storeval = select i1 %cmp1, i32 3, i32 4
  store i32 %storeval, ptr @var, align 8
  ret i32 %op.rdx1
}
Transformation doesn't verify! (unsound)
ERROR: Target's return value is more undefined

Example:
i32 %#0 = poison
i32 %v1 = undef

Source:
<2 x i32> %vload = < #xc0000000 (3221225472, -1073741824), #x00000000 (0) >
i32 %elt1 = #xc0000000 (3221225472, -1073741824)
i1 %cmp1 = #x0 (0)	[based on undef]
i32 %ex0 = #xc0000000 (3221225472, -1073741824)
i32 %select1 = #x00000003 (3)	[based on undef]
ptr %__constexpr_0 = pointer(non-local, block_id=0, offset=8) / Address=#x808
i32 %load3 = #x00000000 (0)
i1 %cmp2 = #x1 (1)	[based on undef]
i32 %select2 = #x00000003 (3)	[based on undef]
ptr %__constexpr_1 = pointer(non-local, block_id=0, offset=12) / Address=#x80c
i32 %load4 = #x00000001 (1)
i1 %cmp3 = #x1 (1)	[based on undef]
i32 %select3 = #x00000003 (3)	[based on undef]
ptr %__constexpr_2 = pointer(non-local, block_id=0, offset=16) / Address=#x810
i32 %load5 = #x00010002 (65538)
i1 %cmp4 = #x0 (0)	[based on undef]
i32 %select4 = #x00010002 (65538)	[based on undef]
ptr %__constexpr_3 = pointer(non-local, block_id=0, offset=20) / Address=#x814
i32 %load6 = #x7fffffff (2147483647)
i1 %cmp5 = #x0 (0)
i32 %select5 = #x7fffffff (2147483647)
i32 %storeval = #x00000004 (4)	[based on undef]

SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 >	size: 128	align: 16	alloc type: 0	alive: true	address: 2048
Contents:
5: #x7fffffff
4: #x00010002
0: #xc0000000
3: #x00000001
*: #x00000000

Block 1 >	size: 8	align: 8	alloc type: 0	alive: true	address: 8

Target:
<2 x i32> %vload = < #xc0000000 (3221225472, -1073741824), #x00000000 (0) >
i32 %elt1 = #xc0000000 (3221225472, -1073741824)
i1 %cmp1 = #x0 (0)
i32 %ex0 = #xc0000000 (3221225472, -1073741824)
i32 %select1 = #x7fffffff (2147483647)
ptr %__constexpr_0 = pointer(non-local, block_id=0, offset=8) / Address=#x808
<4 x i32> %#2 = < #x00000000 (0), #x00000001 (1), #x00010002 (65538), #x7fffffff (2147483647) >
i32 %#3 = #x7fffffff (2147483647)
i1 %op.rdx = #x0 (0)
i32 %op.rdx1 = #x00000000 (0)
i32 %storeval = #x00000004 (4)	[based on undef]
Source value: #x7fffffff (2147483647)
Target value: #x00000000 (0)

Pass: SLPVectorizerPass
Command line: '/home/nlopes/llvm/build/bin/opt' '-load=/home/nlopes/alive2/build/tv/tv.so' '-load-pass-plugin=/home/nlopes/alive2/build/tv/tv.so' '-tv-exit-on-error' '-mtriple=amdgcn-amd-amdhsa' '-mcpu=gfx900' '-passes=slp-vectorizer,dce' '-S' '-slp-threshold=-100' '-slp-vectorize-hor-store' '-tv-smt-to=20000' '-tv-report-dir=/home/nlopes/alive2/build/logs' '-tv-smt-stats'
Wrote bitcode to: "/home/nlopes/alive2/build/logs/in_IaLPesLN_nyrn.bc"


------------------- SMT STATS -------------------
Num queries: 36
Num invalid: 0
Num skips:   0
Num trivial: 57 (61.3%)
Num timeout: 1 (2.8%)
Num errors:  0 (0.0%)
Num SAT:     19 (52.8%)
Num UNSAT:   16 (44.4%)
Alive2: Transform doesn't verify; aborting!

stderr:

RUN: at line 2: /home/nlopes/alive2/build/opt-alive.sh < /bitbucket/nlopes/llvm/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,dce -S -slp-threshold=-100 -slp-vectorize-hor-store | /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll --check-prefix=GFX9
+ /home/nlopes/alive2/build/opt-alive.sh -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,dce -S -slp-threshold=-100 -slp-vectorize-hor-store
+ /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll --check-prefix=GFX9

FileCheck error: '<stdin>' is empty.
FileCheck command line:  /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll --check-prefix=GFX9

 

NOTE: This test would pass if undef didn't exist!

 

<-- Back