Test Failure: Transforms/LoopVectorize/runtime-checks-difference.ll

Test source: git

Log:

Source: /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
-- 1. ModuleToFunctionPassAdaptor
-- 1. PassManager<llvm::Function> : Skipping NOP
-- 2. LoopVectorizePass

----------------------------------------
define void @same_step_and_size(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv
  %l = load i32, ptr %gep.a, align 4
  %mul = mul nsw i32 %l, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 3. LoopVectorizePass

----------------------------------------
define void @same_step_and_size(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv
  %l = load i32, ptr %gep.a, align 4
  %mul = mul nsw i32 %l, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
=>
define void @same_step_and_size(ptr %a, ptr %b, i64 %n) {
entry:
  %a2 = ptrtoint ptr %a to i64
  %b1 = ptrtoint ptr %b to i64
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %#0 = sub i64 %b1, %a2
  %diff.check = icmp ult i64 %#0, 16
  br i1 %diff.check, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#1 = add i64 %index, 0
  %#2 = gep inbounds ptr %a, 4 x i64 %#1
  %#3 = gep inbounds ptr %#2, 4 x i32 0
  %wide.load = load <4 x i32>, ptr %#3, align 4
  %#4 = mul nsw <4 x i32> %wide.load, { 3, 3, 3, 3 }
  %#5 = gep inbounds ptr %b, 4 x i64 %#1
  %#6 = gep inbounds ptr %#5, 4 x i32 0
  store <4 x i32> %#4, ptr %#6, align 4
  %index.next = add nuw i64 %index, 4
  %#7 = icmp eq i64 %index.next, %n.vec
  br i1 %#7, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %exit, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %entry ]
  br label %loop

loop:
  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv
  %l = load i32, ptr %gep.a, align 4
  %mul = mul nsw i32 %l, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct!

-- 4. PassManager<llvm::Function> : Skipping NOP
-- 5. PassManager<llvm::Function> : Skipping NOP
-- 6. LoopVectorizePass

----------------------------------------
define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
  %cmp = icmp ne i64 %iv, %x
  br i1 %cmp, label %then, label %else

then:
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv
  store i32 0, ptr %gep.a, align 4
  br label %loop.latch

else:
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 10, ptr %gep.b, align 4
  br label %loop.latch

loop.latch:
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 7. LoopVectorizePass

----------------------------------------
define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
  %cmp = icmp ne i64 %iv, %x
  br i1 %cmp, label %then, label %else

then:
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv
  store i32 0, ptr %gep.a, align 4
  br label %loop.latch

else:
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 10, ptr %gep.b, align 4
  br label %loop.latch

loop.latch:
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
=>
define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) {
entry:
  %b2 = ptrtoint ptr %b to i64
  %a1 = ptrtoint ptr %a to i64
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %#0 = sub i64 %a1, %b2
  %diff.check = icmp ult i64 %#0, 16
  br i1 %diff.check, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, 0, 0, 0, 0
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %pred.store.continue16 ]
  %vec.ind = phi <4 x i64> [ { 0, 1, 2, 3 }, %vector.ph ], [ %vec.ind.next, %pred.store.continue16 ]
  %#1 = add i64 %index, 0
  %#2 = add i64 %index, 1
  %#3 = add i64 %index, 2
  %#4 = add i64 %index, 3
  %#5 = icmp ne <4 x i64> %vec.ind, %broadcast.splat
  %#6 = xor <4 x i1> %#5, { 1, 1, 1, 1 }
  %#7 = extractelement <4 x i1> %#6, i32 0
  br i1 %#7, label %pred.store.if, label %pred.store.continue

pred.store.if:
  %#8 = gep inbounds ptr %b, 4 x i64 %#1
  store i32 10, ptr %#8, align 4
  br label %pred.store.continue

pred.store.continue:
  %#9 = extractelement <4 x i1> %#6, i32 1
  br i1 %#9, label %pred.store.if3, label %pred.store.continue4

pred.store.if3:
  %#10 = gep inbounds ptr %b, 4 x i64 %#2
  store i32 10, ptr %#10, align 4
  br label %pred.store.continue4

pred.store.continue4:
  %#11 = extractelement <4 x i1> %#6, i32 2
  br i1 %#11, label %pred.store.if5, label %pred.store.continue6

pred.store.if5:
  %#12 = gep inbounds ptr %b, 4 x i64 %#3
  store i32 10, ptr %#12, align 4
  br label %pred.store.continue6

pred.store.continue6:
  %#13 = extractelement <4 x i1> %#6, i32 3
  br i1 %#13, label %pred.store.if7, label %pred.store.continue8

pred.store.if7:
  %#14 = gep inbounds ptr %b, 4 x i64 %#4
  store i32 10, ptr %#14, align 4
  br label %pred.store.continue8

pred.store.continue8:
  %#15 = extractelement <4 x i1> %#5, i32 0
  br i1 %#15, label %pred.store.if9, label %pred.store.continue10

pred.store.if9:
  %#16 = gep inbounds ptr %a, 4 x i64 %#1
  store i32 0, ptr %#16, align 4
  br label %pred.store.continue10

pred.store.continue10:
  %#17 = extractelement <4 x i1> %#5, i32 1
  br i1 %#17, label %pred.store.if11, label %pred.store.continue12

pred.store.if11:
  %#18 = gep inbounds ptr %a, 4 x i64 %#2
  store i32 0, ptr %#18, align 4
  br label %pred.store.continue12

pred.store.continue12:
  %#19 = extractelement <4 x i1> %#5, i32 2
  br i1 %#19, label %pred.store.if13, label %pred.store.continue14

pred.store.if13:
  %#20 = gep inbounds ptr %a, 4 x i64 %#3
  store i32 0, ptr %#20, align 4
  br label %pred.store.continue14

pred.store.continue14:
  %#21 = extractelement <4 x i1> %#5, i32 3
  br i1 %#21, label %pred.store.if15, label %pred.store.continue16

pred.store.if15:
  %#22 = gep inbounds ptr %a, 4 x i64 %#4
  store i32 0, ptr %#22, align 4
  br label %pred.store.continue16

pred.store.continue16:
  %index.next = add nuw i64 %index, 4
  %vec.ind.next = add <4 x i64> %vec.ind, { 4, 4, 4, 4 }
  %#23 = icmp eq i64 %index.next, %n.vec
  br i1 %#23, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %exit, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %entry ]
  br label %loop

loop:
  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop.latch ]
  %cmp = icmp ne i64 %iv, %x
  br i1 %cmp, label %then, label %else

then:
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv
  store i32 0, ptr %gep.a, align 4
  br label %loop.latch

else:
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 10, ptr %gep.b, align 4
  br label %loop.latch

loop.latch:
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct!

-- 8. PassManager<llvm::Function> : Skipping NOP
-- 9. PassManager<llvm::Function> : Skipping NOP
-- 10. LoopVectorizePass

----------------------------------------
define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 2 x i64 %iv
  %l = load i16, ptr %gep.a, align 2
  %l.ext = sext i16 %l to i32
  %mul = mul nsw i32 %l.ext, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 11. LoopVectorizePass

----------------------------------------
define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 2 x i64 %iv
  %l = load i16, ptr %gep.a, align 2
  %l.ext = sext i16 %l to i32
  %mul = mul nsw i32 %l.ext, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
=>
define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) {
entry:
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %#0 = shl i64 %n, 2
  %scevgep = gep ptr %b, 1 x i64 %#0
  %#1 = shl i64 %n, 1
  %scevgep1 = gep ptr %a, 1 x i64 %#1
  %bound0 = icmp ult ptr %b, %scevgep1
  %bound1 = icmp ult ptr %a, %scevgep
  %found.conflict = and i1 %bound0, %bound1
  br i1 %found.conflict, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#2 = add i64 %index, 0
  %#3 = gep inbounds ptr %a, 2 x i64 %#2
  %#4 = gep inbounds ptr %#3, 2 x i32 0
  %wide.load = load <4 x i16>, ptr %#4, align 2
  %#5 = sext <4 x i16> %wide.load to <4 x i32>
  %#6 = mul nsw <4 x i32> %#5, { 3, 3, 3, 3 }
  %#7 = gep inbounds ptr %b, 4 x i64 %#2
  %#8 = gep inbounds ptr %#7, 4 x i32 0
  store <4 x i32> %#6, ptr %#8, align 4
  %index.next = add nuw i64 %index, 4
  %#9 = icmp eq i64 %index.next, %n.vec
  br i1 %#9, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %exit, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %entry ]
  br label %loop

loop:
  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 2 x i64 %iv
  %l = load i16, ptr %gep.a, align 2
  %l.ext = sext i16 %l to i32
  %mul = mul nsw i32 %l.ext, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct!

ERROR: Unsupported metadata: 7
-- 12. PassManager<llvm::Function> : Skipping NOP
ERROR: Unsupported metadata: 7
-- 13. PassManager<llvm::Function> : Skipping NOP
-- 14. LoopVectorizePass

----------------------------------------
define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
  %l = load i16, ptr %gep.a, align 2
  %l.ext = sext i16 %l to i32
  %mul = mul nsw i32 %l.ext, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 15. LoopVectorizePass

----------------------------------------
define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
  %l = load i16, ptr %gep.a, align 2
  %l.ext = sext i16 %l to i32
  %mul = mul nsw i32 %l.ext, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
=>
define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
entry:
  %a2 = ptrtoint ptr %a to i64
  %b1 = ptrtoint ptr %b to i64
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %#0 = add i64 %b1, -2
  %#1 = sub i64 %#0, %a2
  %diff.check = icmp ult i64 %#1, 16
  br i1 %diff.check, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#2 = add i64 %index, 0
  %#3 = add i64 %index, 1
  %#4 = add i64 %index, 2
  %#5 = add i64 %index, 3
  %#6 = gep inbounds ptr %a, 4 x i64 %#2, 2 x i64 1
  %#7 = gep inbounds ptr %a, 4 x i64 %#3, 2 x i64 1
  %#8 = gep inbounds ptr %a, 4 x i64 %#4, 2 x i64 1
  %#9 = gep inbounds ptr %a, 4 x i64 %#5, 2 x i64 1
  %#10 = load i16, ptr %#6, align 2
  %#11 = load i16, ptr %#7, align 2
  %#12 = load i16, ptr %#8, align 2
  %#13 = load i16, ptr %#9, align 2
  %#14 = insertelement <4 x i16> poison, i16 %#10, i32 0
  %#15 = insertelement <4 x i16> %#14, i16 %#11, i32 1
  %#16 = insertelement <4 x i16> %#15, i16 %#12, i32 2
  %#17 = insertelement <4 x i16> %#16, i16 %#13, i32 3
  %#18 = sext <4 x i16> %#17 to <4 x i32>
  %#19 = mul nsw <4 x i32> %#18, { 3, 3, 3, 3 }
  %#20 = gep inbounds ptr %b, 4 x i64 %#2
  %#21 = gep inbounds ptr %#20, 4 x i32 0
  store <4 x i32> %#19, ptr %#21, align 4
  %index.next = add nuw i64 %index, 4
  %#22 = icmp eq i64 %index.next, %n.vec
  br i1 %#22, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %exit, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %entry ]
  br label %loop

loop:
  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
  %l = load i16, ptr %gep.a, align 2
  %l.ext = sext i16 %l to i32
  %mul = mul nsw i32 %l.ext, 3
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  store i32 %mul, ptr %gep.b, align 4
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct!

-- 16. PassManager<llvm::Function> : Skipping NOP
-- 17. PassManager<llvm::Function> : Skipping NOP
-- 18. LoopVectorizePass

----------------------------------------
define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  %l = load i32, ptr %gep.b, align 4
  %mul = mul nsw i32 %l, 3
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
  %trunc = trunc i32 %mul to i16
  store i16 %trunc, ptr %gep.a, align 2
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 19. LoopVectorizePass

----------------------------------------
define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  %l = load i32, ptr %gep.b, align 4
  %mul = mul nsw i32 %l, 3
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
  %trunc = trunc i32 %mul to i16
  store i16 %trunc, ptr %gep.a, align 2
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
=>
define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
entry:
  %b2 = ptrtoint ptr %b to i64
  %a1 = ptrtoint ptr %a to i64
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %#0 = add i64 %a1, 2
  %#1 = sub i64 %#0, %b2
  %diff.check = icmp ult i64 %#1, 16
  br i1 %diff.check, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#2 = add i64 %index, 0
  %#3 = add i64 %index, 1
  %#4 = add i64 %index, 2
  %#5 = add i64 %index, 3
  %#6 = gep inbounds ptr %b, 4 x i64 %#2
  %#7 = gep inbounds ptr %#6, 4 x i32 0
  %wide.load = load <4 x i32>, ptr %#7, align 4
  %#8 = mul nsw <4 x i32> %wide.load, { 3, 3, 3, 3 }
  %#9 = gep inbounds ptr %a, 4 x i64 %#2, 2 x i64 1
  %#10 = gep inbounds ptr %a, 4 x i64 %#3, 2 x i64 1
  %#11 = gep inbounds ptr %a, 4 x i64 %#4, 2 x i64 1
  %#12 = gep inbounds ptr %a, 4 x i64 %#5, 2 x i64 1
  %#13 = trunc <4 x i32> %#8 to <4 x i16>
  %#14 = extractelement <4 x i16> %#13, i32 0
  store i16 %#14, ptr %#9, align 2
  %#15 = extractelement <4 x i16> %#13, i32 1
  store i16 %#15, ptr %#10, align 2
  %#16 = extractelement <4 x i16> %#13, i32 2
  store i16 %#16, ptr %#11, align 2
  %#17 = extractelement <4 x i16> %#13, i32 3
  store i16 %#17, ptr %#12, align 2
  %index.next = add nuw i64 %index, 4
  %#18 = icmp eq i64 %index.next, %n.vec
  br i1 %#18, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %exit, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %entry ]
  br label %loop

loop:
  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
  %gep.b = gep inbounds ptr %b, 4 x i64 %iv
  %l = load i32, ptr %gep.b, align 4
  %mul = mul nsw i32 %l, 3
  %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
  %trunc = trunc i32 %mul to i16
  store i16 %trunc, ptr %gep.a, align 2
  %iv.next = add nsw nuw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %exit, label %loop

exit:
  ret void
}
WARNING: slow vcgen! Took 6.38 s
Transformation seems to be correct!

-- 20. PassManager<llvm::Function> : Skipping NOP
-- 21. PassManager<llvm::Function> : Skipping NOP
-- 22. LoopVectorizePass

----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
entry:
  br label %outer.header

outer.header:
  %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
  br label %inner.body

inner.body:
  %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
  %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
  %l = load i32, ptr %gep.b, align 4
  %sub = sub i32 %l, 10
  store i32 %sub, ptr %gep.a, align 4
  %inner.iv.next = add nsw nuw i64 %inner.iv, 1
  %inner.cond = icmp eq i64 %inner.iv.next, %n
  br i1 %inner.cond, label %outer.latch, label %inner.body

outer.latch:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.cond = icmp eq i64 %outer.iv.next, %n
  br i1 %outer.cond, label %exit, label %outer.header

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 23. LoopVectorizePass

----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
entry:
  br label %outer.header

outer.header:
  %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
  br label %inner.body

inner.body:
  %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
  %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
  %l = load i32, ptr %gep.b, align 4
  %sub = sub i32 %l, 10
  store i32 %sub, ptr %gep.a, align 4
  %inner.iv.next = add nsw nuw i64 %inner.iv, 1
  %inner.cond = icmp eq i64 %inner.iv.next, %n
  br i1 %inner.cond, label %outer.latch, label %inner.body

outer.latch:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.cond = icmp eq i64 %outer.iv.next, %n
  br i1 %outer.cond, label %exit, label %outer.header

exit:
  ret void
}
=>
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
entry:
  %#0 = shl i64 %n, 2
  %scevgep2 = gep ptr %b, 1 x i64 %#0
  br label %outer.header

outer.header:
  %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
  %#1 = shl i64 %outer.iv, 2
  %scevgep = gep nuw ptr %a, 1 x i64 %#1
  %#2 = add i64 %#1, 4
  %scevgep1 = gep ptr %a, 1 x i64 %#2
  %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %bound0 = icmp ult ptr %scevgep, %scevgep2
  %bound1 = icmp ult ptr %b, %scevgep1
  %found.conflict = and i1 %bound0, %bound1
  br i1 %found.conflict, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#3 = add i64 %index, 0
  %#4 = gep inbounds ptr %b, 4 x i64 %#3
  %#5 = gep inbounds ptr %#4, 4 x i32 0
  %wide.load = load <4 x i32>, ptr %#5, align 4
  %#6 = sub <4 x i32> %wide.load, { 10, 10, 10, 10 }
  %#7 = extractelement <4 x i32> %#6, i32 3
  store i32 %#7, ptr %gep.a, align 4
  %index.next = add nuw i64 %index, 4
  %#8 = icmp eq i64 %index.next, %n.vec
  br i1 %#8, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %outer.latch, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %outer.header ]
  br label %inner.body

inner.body:
  %inner.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %inner.iv.next, %inner.body ]
  %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
  %l = load i32, ptr %gep.b, align 4
  %sub = sub i32 %l, 10
  store i32 %sub, ptr %gep.a, align 4
  %inner.iv.next = add nsw nuw i64 %inner.iv, 1
  %inner.cond = icmp eq i64 %inner.iv.next, %n
  br i1 %inner.cond, label %outer.latch, label %inner.body

outer.latch:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.cond = icmp eq i64 %outer.iv.next, %n
  br i1 %outer.cond, label %exit, label %outer.header

exit:
  ret void
}
Transformation seems to be correct!

ERROR: Unsupported metadata: 7
-- 24. PassManager<llvm::Function> : Skipping NOP
ERROR: Unsupported metadata: 7
-- 25. PassManager<llvm::Function> : Skipping NOP
-- 26. LoopVectorizePass

----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
entry:
  br label %outer.header

outer.header:
  %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
  br label %inner.body

inner.body:
  %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
  %l = load i32, ptr %gep.a, align 4
  %sub = sub i32 %l, 10
  %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
  store i32 %sub, ptr %gep.b, align 4
  %inner.iv.next = add nsw nuw i64 %inner.iv, 1
  %inner.cond = icmp eq i64 %inner.iv.next, %n
  br i1 %inner.cond, label %outer.latch, label %inner.body

outer.latch:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.cond = icmp eq i64 %outer.iv.next, %n
  br i1 %outer.cond, label %exit, label %outer.header

exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 27. LoopVectorizePass

----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
entry:
  br label %outer.header

outer.header:
  %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
  %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
  br label %inner.body

inner.body:
  %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
  %l = load i32, ptr %gep.a, align 4
  %sub = sub i32 %l, 10
  %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
  store i32 %sub, ptr %gep.b, align 4
  %inner.iv.next = add nsw nuw i64 %inner.iv, 1
  %inner.cond = icmp eq i64 %inner.iv.next, %n
  br i1 %inner.cond, label %outer.latch, label %inner.body

outer.latch:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.cond = icmp eq i64 %outer.iv.next, %n
  br i1 %outer.cond, label %exit, label %outer.header

exit:
  ret void
}
=>
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
entry:
  %#0 = shl i64 %n, 2
  %scevgep = gep ptr %b, 1 x i64 %#0
  br label %outer.header

outer.header:
  %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
  %#1 = shl i64 %outer.iv, 2
  %scevgep1 = gep nuw ptr %a, 1 x i64 %#1
  %#2 = add i64 %#1, 4
  %scevgep2 = gep ptr %a, 1 x i64 %#2
  %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %bound0 = icmp ult ptr %b, %scevgep2
  %bound1 = icmp ult ptr %scevgep1, %scevgep
  %found.conflict = and i1 %bound0, %bound1
  br i1 %found.conflict, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#3 = add i64 %index, 0
  %#4 = load i32, ptr %gep.a, align 4
  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %#4, i64 0
  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, 0, 0, 0, 0
  %#5 = sub <4 x i32> %broadcast.splat, { 10, 10, 10, 10 }
  %#6 = gep inbounds ptr %b, 4 x i64 %#3
  %#7 = gep inbounds ptr %#6, 4 x i32 0
  store <4 x i32> %#5, ptr %#7, align 4
  %index.next = add nuw i64 %index, 4
  %#8 = icmp eq i64 %index.next, %n.vec
  br i1 %#8, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %outer.latch, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %outer.header ]
  br label %inner.body

inner.body:
  %inner.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %inner.iv.next, %inner.body ]
  %l = load i32, ptr %gep.a, align 4
  %sub = sub i32 %l, 10
  %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
  store i32 %sub, ptr %gep.b, align 4
  %inner.iv.next = add nsw nuw i64 %inner.iv, 1
  %inner.cond = icmp eq i64 %inner.iv.next, %n
  br i1 %inner.cond, label %outer.latch, label %inner.body

outer.latch:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.cond = icmp eq i64 %outer.iv.next, %n
  br i1 %outer.cond, label %exit, label %outer.header

exit:
  ret void
}
Transformation seems to be correct!

ERROR: Unsupported metadata: 7
-- 28. PassManager<llvm::Function> : Skipping NOP
ERROR: Unsupported metadata: 7
-- 29. PassManager<llvm::Function> : Skipping NOP
-- 30. LoopVectorizePass

----------------------------------------
define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) {
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
  %mul = mul nsw i64 %outer.iv, noundef %n
  br label %inner.loop

inner.loop:
  %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
  %idx = add nsw nuw i64 %iv.inner, %mul
  %gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx
  %l = load i32, ptr %gep.src, align 4
  %gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx
  %add = add nsw i32 %l, 10
  store i32 %add, ptr %gep.dst, align 4
  %iv.inner.next = add nsw nuw i64 %iv.inner, 1
  %inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n
  br i1 %inner.exit.cond, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m
  br i1 %outer.exit.cond, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 31. LoopVectorizePass

----------------------------------------
define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) {
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
  %mul = mul nsw i64 %outer.iv, noundef %n
  br label %inner.loop

inner.loop:
  %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
  %idx = add nsw nuw i64 %iv.inner, %mul
  %gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx
  %l = load i32, ptr %gep.src, align 4
  %gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx
  %add = add nsw i32 %l, 10
  store i32 %add, ptr %gep.dst, align 4
  %iv.inner.next = add nsw nuw i64 %iv.inner, 1
  %inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n
  br i1 %inner.exit.cond, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m
  br i1 %outer.exit.cond, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}
=>
define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) {
entry:
  %src2 = ptrtoint ptr nocapture nowrite noundef %src to i64
  %dst1 = ptrtoint ptr nocapture noundef %dst to i64
  %#0 = sub i64 %dst1, %src2
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
  %mul = mul nsw i64 %outer.iv, noundef %n
  %min.iters.check = icmp ult i64 noundef %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:
  %diff.check = icmp ult i64 %#0, 16
  br i1 %diff.check, label %scalar.ph, label %vector.ph

vector.ph:
  %n.mod.vf = urem i64 noundef %n, 4
  %n.vec = sub i64 noundef %n, %n.mod.vf
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %#1 = add i64 %index, 0
  %#2 = add nsw nuw i64 %#1, %mul
  %#3 = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %#2
  %#4 = gep inbounds ptr %#3, 4 x i32 0
  %wide.load = load <4 x i32>, ptr %#4, align 4
  %#5 = gep inbounds ptr nocapture noundef %dst, 4 x i64 %#2
  %#6 = add nsw <4 x i32> %wide.load, { 10, 10, 10, 10 }
  %#7 = gep inbounds ptr %#5, 4 x i32 0
  store <4 x i32> %#6, ptr %#7, align 4
  %index.next = add nuw i64 %index, 4
  %#8 = icmp eq i64 %index.next, %n.vec
  br i1 %#8, label %middle.block, label %vector.body

middle.block:
  %cmp.n = icmp eq i64 noundef %n, %n.vec
  br i1 %cmp.n, label %inner.exit, label %scalar.ph

scalar.ph:
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %vector.memcheck ], [ 0, %outer.loop ]
  br label %inner.loop

inner.loop:
  %iv.inner = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.inner.next, %inner.loop ]
  %idx = add nsw nuw i64 %iv.inner, %mul
  %gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx
  %l = load i32, ptr %gep.src, align 4
  %gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx
  %add = add nsw i32 %l, 10
  store i32 %add, ptr %gep.dst, align 4
  %iv.inner.next = add nsw nuw i64 %iv.inner, 1
  %inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n
  br i1 %inner.exit.cond, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nsw nuw i64 %outer.iv, 1
  %outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m
  br i1 %outer.exit.cond, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}
Transformation doesn't verify! (unsound)
ERROR: Source is more defined than target

Example:
ptr nocapture noundef %dst = pointer(non-local, block_id=1, offset=2, attrs=1) / Address=#x0000000000000004
ptr nocapture nowrite noundef %src = pointer(non-local, block_id=1, offset=2, attrs=3) / Address=#x0000000000000004
i64 noundef %m = #x0000000000000001 (1)
i64 noundef %n = #x0000000000000001 (1)

Source:
  >> Jump to %outer.loop
i64 %outer.iv = #x0000000000000000 (0)
i64 %mul = #x0000000000000000 (0)
  >> Jump to %inner.loop
i64 %iv.inner = #x0000000000000000 (0)
i64 %idx = #x0000000000000000 (0)
ptr %gep.src = pointer(non-local, block_id=1, offset=2, attrs=3) / Address=#x0000000000000004
i32 %l = poison
ptr %gep.dst = pointer(non-local, block_id=1, offset=2, attrs=1) / Address=#x0000000000000004
i32 %add = poison
i64 %iv.inner.next = #x0000000000000001 (1)
i1 %inner.exit.cond = #x1 (1)
  >> Jump to %inner.exit
i64 %outer.iv.next = #x0000000000000001 (1)
i1 %outer.exit.cond = #x1 (1)
  >> Jump to %outer.exit

SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 >	size: 0	align: 4	alloc type: 0	alive: false	address: 0
Block 1 >	size: 9	align: 1	alloc type: 0	alive: true	address: 2
Block 2 >	size: 0	align: 2	alloc type: 0	alive: true	address: 2

Target:
i64 %src2 = UB triggered!


Pass: LoopVectorizePass
Command line: '/home/nlopes/llvm/build/bin/opt' '-load=/home/nlopes/alive2/build/tv/tv.so' '-load-pass-plugin=/home/nlopes/alive2/build/tv/tv.so' '-tv-exit-on-error' '/bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll' '-passes=loop-vectorize' '-hoist-runtime-checks=false' '-force-vector-width=4' '-force-vector-interleave=1' '-S' '-tv-smt-to=20000' '-tv-report-dir=/home/nlopes/alive2/build/logs' '-tv-smt-stats'
Wrote bitcode to: "/home/nlopes/alive2/build/logs/runtime-checks-difference_S4sKcQgZ_2ery.bc"


------------------- SMT STATS -------------------
Num queries: 148
Num invalid: 0
Num skips:   0
Num trivial: 35 (19.1%)
Num timeout: 0 (0.0%)
Num errors:  0 (0.0%)
Num SAT:     103 (69.6%)
Num UNSAT:   45 (30.4%)
Alive2: Transform doesn't verify; aborting!

stderr:

RUN: at line 1: /home/nlopes/alive2/build/opt-alive.sh /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll -passes=loop-vectorize -hoist-runtime-checks=false -force-vector-width=4 -force-vector-interleave=1 -S | /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+ /home/nlopes/alive2/build/opt-alive.sh /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll -passes=loop-vectorize -hoist-runtime-checks=false -force-vector-width=4 -force-vector-interleave=1 -S
+ /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll

FileCheck error: '<stdin>' is empty.
FileCheck command line:  /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll

 

<-- Back