Test source: git
Source: /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
-- 1. ModuleToFunctionPassAdaptor
-- 1. PassManager<Function> : Skipping NOP
-- 2. LoopVectorizePass
----------------------------------------
define void @same_step_and_size(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 4 x i64 %iv
%l = load i32, ptr %gep.a, align 4
%mul = mul nsw i32 %l, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 3. LoopVectorizePass
----------------------------------------
define void @same_step_and_size(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 4 x i64 %iv
%l = load i32, ptr %gep.a, align 4
%mul = mul nsw i32 %l, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
=>
define void @same_step_and_size(ptr %a, ptr %b, i64 %n) {
entry:
%a2 = ptrtoint ptr %a to i64
%b1 = ptrtoint ptr %b to i64
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%#0 = sub i64 %b1, %a2
%diff.check = icmp ult i64 %#0, 16
br i1 %diff.check, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#1 = add i64 %index, 0
%#2 = gep inbounds ptr %a, 4 x i64 %#1
%#3 = gep inbounds ptr %#2, 4 x i32 0
%wide.load = load <4 x i32>, ptr %#3, align 4
%#4 = mul nsw <4 x i32> %wide.load, { 3, 3, 3, 3 }
%#5 = gep inbounds ptr %b, 4 x i64 %#1
%#6 = gep inbounds ptr %#5, 4 x i32 0
store <4 x i32> %#4, ptr %#6, align 4
%index.next = add nuw i64 %index, 4
%#7 = icmp eq i64 %index.next, %n.vec
br i1 %#7, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %exit, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ]
br label %loop
loop:
%iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 4 x i64 %iv
%l = load i32, ptr %gep.a, align 4
%mul = mul nsw i32 %l, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct!
-- 4. PassManager<Function> : Skipping NOP
-- 5. PassManager<Function> : Skipping NOP
-- 6. LoopVectorizePass
----------------------------------------
define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp = icmp ne i64 %iv, %x
br i1 %cmp, label %then, label %else
then:
%gep.a = gep inbounds ptr %a, 4 x i64 %iv
store i32 0, ptr %gep.a, align 4
br label %loop.latch
else:
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 10, ptr %gep.b, align 4
br label %loop.latch
loop.latch:
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 7. LoopVectorizePass
----------------------------------------
define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp = icmp ne i64 %iv, %x
br i1 %cmp, label %then, label %else
then:
%gep.a = gep inbounds ptr %a, 4 x i64 %iv
store i32 0, ptr %gep.a, align 4
br label %loop.latch
else:
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 10, ptr %gep.b, align 4
br label %loop.latch
loop.latch:
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
=>
define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) {
entry:
%b2 = ptrtoint ptr %b to i64
%a1 = ptrtoint ptr %a to i64
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%#0 = sub i64 %a1, %b2
%diff.check = icmp ult i64 %#0, 16
br i1 %diff.check, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
%broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, 0, 0, 0, 0
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %pred.store.continue16 ]
%vec.ind = phi <4 x i64> [ { 0, 1, 2, 3 }, %vector.ph ], [ %vec.ind.next, %pred.store.continue16 ]
%#1 = add i64 %index, 0
%#2 = add i64 %index, 1
%#3 = add i64 %index, 2
%#4 = add i64 %index, 3
%#5 = icmp ne <4 x i64> %vec.ind, %broadcast.splat
%#6 = xor <4 x i1> %#5, { 1, 1, 1, 1 }
%#7 = extractelement <4 x i1> %#6, i32 0
br i1 %#7, label %pred.store.if, label %pred.store.continue
pred.store.if:
%#8 = gep inbounds ptr %b, 4 x i64 %#1
store i32 10, ptr %#8, align 4
br label %pred.store.continue
pred.store.continue:
%#9 = extractelement <4 x i1> %#6, i32 1
br i1 %#9, label %pred.store.if3, label %pred.store.continue4
pred.store.if3:
%#10 = gep inbounds ptr %b, 4 x i64 %#2
store i32 10, ptr %#10, align 4
br label %pred.store.continue4
pred.store.continue4:
%#11 = extractelement <4 x i1> %#6, i32 2
br i1 %#11, label %pred.store.if5, label %pred.store.continue6
pred.store.if5:
%#12 = gep inbounds ptr %b, 4 x i64 %#3
store i32 10, ptr %#12, align 4
br label %pred.store.continue6
pred.store.continue6:
%#13 = extractelement <4 x i1> %#6, i32 3
br i1 %#13, label %pred.store.if7, label %pred.store.continue8
pred.store.if7:
%#14 = gep inbounds ptr %b, 4 x i64 %#4
store i32 10, ptr %#14, align 4
br label %pred.store.continue8
pred.store.continue8:
%#15 = extractelement <4 x i1> %#5, i32 0
br i1 %#15, label %pred.store.if9, label %pred.store.continue10
pred.store.if9:
%#16 = gep inbounds ptr %a, 4 x i64 %#1
store i32 0, ptr %#16, align 4
br label %pred.store.continue10
pred.store.continue10:
%#17 = extractelement <4 x i1> %#5, i32 1
br i1 %#17, label %pred.store.if11, label %pred.store.continue12
pred.store.if11:
%#18 = gep inbounds ptr %a, 4 x i64 %#2
store i32 0, ptr %#18, align 4
br label %pred.store.continue12
pred.store.continue12:
%#19 = extractelement <4 x i1> %#5, i32 2
br i1 %#19, label %pred.store.if13, label %pred.store.continue14
pred.store.if13:
%#20 = gep inbounds ptr %a, 4 x i64 %#3
store i32 0, ptr %#20, align 4
br label %pred.store.continue14
pred.store.continue14:
%#21 = extractelement <4 x i1> %#5, i32 3
br i1 %#21, label %pred.store.if15, label %pred.store.continue16
pred.store.if15:
%#22 = gep inbounds ptr %a, 4 x i64 %#4
store i32 0, ptr %#22, align 4
br label %pred.store.continue16
pred.store.continue16:
%index.next = add nuw i64 %index, 4
%vec.ind.next = add <4 x i64> %vec.ind, { 4, 4, 4, 4 }
%#23 = icmp eq i64 %index.next, %n.vec
br i1 %#23, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %exit, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ]
br label %loop
loop:
%iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop.latch ]
%cmp = icmp ne i64 %iv, %x
br i1 %cmp, label %then, label %else
then:
%gep.a = gep inbounds ptr %a, 4 x i64 %iv
store i32 0, ptr %gep.a, align 4
br label %loop.latch
else:
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 10, ptr %gep.b, align 4
br label %loop.latch
loop.latch:
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation doesn't verify! (not unsound)
ERROR: Timeout
-- 8. PassManager<Function> : Skipping NOP
-- 9. PassManager<Function> : Skipping NOP
-- 10. LoopVectorizePass
----------------------------------------
define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 2 x i64 %iv
%l = load i16, ptr %gep.a, align 2
%l.ext = sext i16 %l to i32
%mul = mul nsw i32 %l.ext, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 11. LoopVectorizePass
----------------------------------------
define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 2 x i64 %iv
%l = load i16, ptr %gep.a, align 2
%l.ext = sext i16 %l to i32
%mul = mul nsw i32 %l.ext, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
=>
define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) {
entry:
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%#0 = shl i64 %n, 2
%scevgep = gep ptr %b, 1 x i64 %#0
%#1 = shl i64 %n, 1
%scevgep1 = gep ptr %a, 1 x i64 %#1
%bound0 = icmp ult ptr %b, %scevgep1
%bound1 = icmp ult ptr %a, %scevgep
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#2 = add i64 %index, 0
%#3 = gep inbounds ptr %a, 2 x i64 %#2
%#4 = gep inbounds ptr %#3, 2 x i32 0
%wide.load = load <4 x i16>, ptr %#4, align 2
%#5 = sext <4 x i16> %wide.load to <4 x i32>
%#6 = mul nsw <4 x i32> %#5, { 3, 3, 3, 3 }
%#7 = gep inbounds ptr %b, 4 x i64 %#2
%#8 = gep inbounds ptr %#7, 4 x i32 0
store <4 x i32> %#6, ptr %#8, align 4
%index.next = add nuw i64 %index, 4
%#9 = icmp eq i64 %index.next, %n.vec
br i1 %#9, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %exit, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ]
br label %loop
loop:
%iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 2 x i64 %iv
%l = load i16, ptr %gep.a, align 2
%l.ext = sext i16 %l to i32
%mul = mul nsw i32 %l.ext, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct!
ERROR: Unsupported metadata: 7
-- 12. PassManager<Function> : Skipping NOP
ERROR: Unsupported metadata: 7
-- 13. PassManager<Function> : Skipping NOP
-- 14. LoopVectorizePass
----------------------------------------
define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
%l = load i16, ptr %gep.a, align 2
%l.ext = sext i16 %l to i32
%mul = mul nsw i32 %l.ext, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 15. LoopVectorizePass
----------------------------------------
define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
%l = load i16, ptr %gep.a, align 2
%l.ext = sext i16 %l to i32
%mul = mul nsw i32 %l.ext, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
=>
define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
entry:
%a2 = ptrtoint ptr %a to i64
%b1 = ptrtoint ptr %b to i64
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%#0 = add i64 %b1, -2
%#1 = sub i64 %#0, %a2
%diff.check = icmp ult i64 %#1, 16
br i1 %diff.check, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#2 = add i64 %index, 0
%#3 = add i64 %index, 1
%#4 = add i64 %index, 2
%#5 = add i64 %index, 3
%#6 = gep inbounds ptr %a, 4 x i64 %#2, 2 x i64 1
%#7 = gep inbounds ptr %a, 4 x i64 %#3, 2 x i64 1
%#8 = gep inbounds ptr %a, 4 x i64 %#4, 2 x i64 1
%#9 = gep inbounds ptr %a, 4 x i64 %#5, 2 x i64 1
%#10 = load i16, ptr %#6, align 2
%#11 = load i16, ptr %#7, align 2
%#12 = load i16, ptr %#8, align 2
%#13 = load i16, ptr %#9, align 2
%#14 = insertelement <4 x i16> poison, i16 %#10, i32 0
%#15 = insertelement <4 x i16> %#14, i16 %#11, i32 1
%#16 = insertelement <4 x i16> %#15, i16 %#12, i32 2
%#17 = insertelement <4 x i16> %#16, i16 %#13, i32 3
%#18 = sext <4 x i16> %#17 to <4 x i32>
%#19 = mul nsw <4 x i32> %#18, { 3, 3, 3, 3 }
%#20 = gep inbounds ptr %b, 4 x i64 %#2
%#21 = gep inbounds ptr %#20, 4 x i32 0
store <4 x i32> %#19, ptr %#21, align 4
%index.next = add nuw i64 %index, 4
%#22 = icmp eq i64 %index.next, %n.vec
br i1 %#22, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %exit, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ]
br label %loop
loop:
%iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
%gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
%l = load i16, ptr %gep.a, align 2
%l.ext = sext i16 %l to i32
%mul = mul nsw i32 %l.ext, 3
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
store i32 %mul, ptr %gep.b, align 4
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation doesn't verify! (not unsound)
ERROR: Timeout
-- 16. PassManager<Function> : Skipping NOP
-- 17. PassManager<Function> : Skipping NOP
-- 18. LoopVectorizePass
----------------------------------------
define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
%l = load i32, ptr %gep.b, align 4
%mul = mul nsw i32 %l, 3
%gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
%trunc = trunc i32 %mul to i16
store i16 %trunc, ptr %gep.a, align 2
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 19. LoopVectorizePass
----------------------------------------
define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
%l = load i32, ptr %gep.b, align 4
%mul = mul nsw i32 %l, 3
%gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
%trunc = trunc i32 %mul to i16
store i16 %trunc, ptr %gep.a, align 2
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
=>
define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
entry:
%b2 = ptrtoint ptr %b to i64
%a1 = ptrtoint ptr %a to i64
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%#0 = add i64 %a1, 2
%#1 = sub i64 %#0, %b2
%diff.check = icmp ult i64 %#1, 16
br i1 %diff.check, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#2 = add i64 %index, 0
%#3 = add i64 %index, 1
%#4 = add i64 %index, 2
%#5 = add i64 %index, 3
%#6 = gep inbounds ptr %b, 4 x i64 %#2
%#7 = gep inbounds ptr %#6, 4 x i32 0
%wide.load = load <4 x i32>, ptr %#7, align 4
%#8 = mul nsw <4 x i32> %wide.load, { 3, 3, 3, 3 }
%#9 = gep inbounds ptr %a, 4 x i64 %#2, 2 x i64 1
%#10 = gep inbounds ptr %a, 4 x i64 %#3, 2 x i64 1
%#11 = gep inbounds ptr %a, 4 x i64 %#4, 2 x i64 1
%#12 = gep inbounds ptr %a, 4 x i64 %#5, 2 x i64 1
%#13 = trunc <4 x i32> %#8 to <4 x i16>
%#14 = extractelement <4 x i16> %#13, i32 0
store i16 %#14, ptr %#9, align 2
%#15 = extractelement <4 x i16> %#13, i32 1
store i16 %#15, ptr %#10, align 2
%#16 = extractelement <4 x i16> %#13, i32 2
store i16 %#16, ptr %#11, align 2
%#17 = extractelement <4 x i16> %#13, i32 3
store i16 %#17, ptr %#12, align 2
%index.next = add nuw i64 %index, 4
%#18 = icmp eq i64 %index.next, %n.vec
br i1 %#18, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %exit, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ]
br label %loop
loop:
%iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
%gep.b = gep inbounds ptr %b, 4 x i64 %iv
%l = load i32, ptr %gep.b, align 4
%mul = mul nsw i32 %l, 3
%gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1
%trunc = trunc i32 %mul to i16
store i16 %trunc, ptr %gep.a, align 2
%iv.next = add nsw nuw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
Transformation doesn't verify! (not unsound)
ERROR: Timeout
-- 20. PassManager<Function> : Skipping NOP
-- 21. PassManager<Function> : Skipping NOP
-- 22. LoopVectorizePass
----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
entry:
br label %outer.header
outer.header:
%outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
%gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
br label %inner.body
inner.body:
%inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
%gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
%l = load i32, ptr %gep.b, align 4
%sub = sub i32 %l, 10
store i32 %sub, ptr %gep.a, align 4
%inner.iv.next = add nsw nuw i64 %inner.iv, 1
%inner.cond = icmp eq i64 %inner.iv.next, %n
br i1 %inner.cond, label %outer.latch, label %inner.body
outer.latch:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.cond = icmp eq i64 %outer.iv.next, %n
br i1 %outer.cond, label %exit, label %outer.header
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 23. LoopVectorizePass
----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
entry:
br label %outer.header
outer.header:
%outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
%gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
br label %inner.body
inner.body:
%inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
%gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
%l = load i32, ptr %gep.b, align 4
%sub = sub i32 %l, 10
store i32 %sub, ptr %gep.a, align 4
%inner.iv.next = add nsw nuw i64 %inner.iv, 1
%inner.cond = icmp eq i64 %inner.iv.next, %n
br i1 %inner.cond, label %outer.latch, label %inner.body
outer.latch:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.cond = icmp eq i64 %outer.iv.next, %n
br i1 %outer.cond, label %exit, label %outer.header
exit:
ret void
}
=>
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
entry:
%#0 = shl i64 %n, 2
%scevgep2 = gep ptr %b, 1 x i64 %#0
br label %outer.header
outer.header:
%outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
%#1 = shl i64 %outer.iv, 2
%scevgep = gep ptr %a, 1 x i64 %#1
%#2 = add i64 %#1, 4
%scevgep1 = gep ptr %a, 1 x i64 %#2
%gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%bound0 = icmp ult ptr %scevgep, %scevgep2
%bound1 = icmp ult ptr %b, %scevgep1
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#3 = add i64 %index, 0
%#4 = gep inbounds ptr %b, 4 x i64 %#3
%#5 = gep inbounds ptr %#4, 4 x i32 0
%wide.load = load <4 x i32>, ptr %#5, align 4
%#6 = sub <4 x i32> %wide.load, { 10, 10, 10, 10 }
%#7 = extractelement <4 x i32> %#6, i32 3
store i32 %#7, ptr %gep.a, align 4
%index.next = add nuw i64 %index, 4
%#8 = icmp eq i64 %index.next, %n.vec
br i1 %#8, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %outer.latch, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %outer.header ], [ 0, %vector.memcheck ]
br label %inner.body
inner.body:
%inner.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %inner.iv.next, %inner.body ]
%gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
%l = load i32, ptr %gep.b, align 4
%sub = sub i32 %l, 10
store i32 %sub, ptr %gep.a, align 4
%inner.iv.next = add nsw nuw i64 %inner.iv, 1
%inner.cond = icmp eq i64 %inner.iv.next, %n
br i1 %inner.cond, label %outer.latch, label %inner.body
outer.latch:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.cond = icmp eq i64 %outer.iv.next, %n
br i1 %outer.cond, label %exit, label %outer.header
exit:
ret void
}
Transformation seems to be correct!
ERROR: Unsupported metadata: 7
-- 24. PassManager<Function> : Skipping NOP
ERROR: Unsupported metadata: 7
-- 25. PassManager<Function> : Skipping NOP
-- 26. LoopVectorizePass
----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
entry:
br label %outer.header
outer.header:
%outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
%gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
br label %inner.body
inner.body:
%inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
%l = load i32, ptr %gep.a, align 4
%sub = sub i32 %l, 10
%gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
store i32 %sub, ptr %gep.b, align 4
%inner.iv.next = add nsw nuw i64 %inner.iv, 1
%inner.cond = icmp eq i64 %inner.iv.next, %n
br i1 %inner.cond, label %outer.latch, label %inner.body
outer.latch:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.cond = icmp eq i64 %outer.iv.next, %n
br i1 %outer.cond, label %exit, label %outer.header
exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 27. LoopVectorizePass
----------------------------------------
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
entry:
br label %outer.header
outer.header:
%outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
%gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
br label %inner.body
inner.body:
%inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ]
%l = load i32, ptr %gep.a, align 4
%sub = sub i32 %l, 10
%gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
store i32 %sub, ptr %gep.b, align 4
%inner.iv.next = add nsw nuw i64 %inner.iv, 1
%inner.cond = icmp eq i64 %inner.iv.next, %n
br i1 %inner.cond, label %outer.latch, label %inner.body
outer.latch:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.cond = icmp eq i64 %outer.iv.next, %n
br i1 %outer.cond, label %exit, label %outer.header
exit:
ret void
}
=>
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
entry:
%#0 = shl i64 %n, 2
%scevgep = gep ptr %b, 1 x i64 %#0
br label %outer.header
outer.header:
%outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ]
%#1 = shl i64 %outer.iv, 2
%scevgep1 = gep ptr %a, 1 x i64 %#1
%#2 = add i64 %#1, 4
%scevgep2 = gep ptr %a, 1 x i64 %#2
%gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv
%min.iters.check = icmp ult i64 %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%bound0 = icmp ult ptr %b, %scevgep2
%bound1 = icmp ult ptr %scevgep1, %scevgep
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 %n, 4
%n.vec = sub i64 %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#3 = add i64 %index, 0
%#4 = load i32, ptr %gep.a, align 4
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %#4, i64 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, 0, 0, 0, 0
%#5 = sub <4 x i32> %broadcast.splat, { 10, 10, 10, 10 }
%#6 = gep inbounds ptr %b, 4 x i64 %#3
%#7 = gep inbounds ptr %#6, 4 x i32 0
store <4 x i32> %#5, ptr %#7, align 4
%index.next = add nuw i64 %index, 4
%#8 = icmp eq i64 %index.next, %n.vec
br i1 %#8, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 %n, %n.vec
br i1 %cmp.n, label %outer.latch, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %outer.header ], [ 0, %vector.memcheck ]
br label %inner.body
inner.body:
%inner.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %inner.iv.next, %inner.body ]
%l = load i32, ptr %gep.a, align 4
%sub = sub i32 %l, 10
%gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv
store i32 %sub, ptr %gep.b, align 4
%inner.iv.next = add nsw nuw i64 %inner.iv, 1
%inner.cond = icmp eq i64 %inner.iv.next, %n
br i1 %inner.cond, label %outer.latch, label %inner.body
outer.latch:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.cond = icmp eq i64 %outer.iv.next, %n
br i1 %outer.cond, label %exit, label %outer.header
exit:
ret void
}
Transformation doesn't verify! (not unsound)
ERROR: Timeout
ERROR: Unsupported metadata: 7
-- 28. PassManager<Function> : Skipping NOP
ERROR: Unsupported metadata: 7
-- 29. PassManager<Function> : Skipping NOP
-- 30. LoopVectorizePass
----------------------------------------
define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) {
entry:
br label %outer.loop
outer.loop:
%outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
%mul = mul nsw i64 %outer.iv, noundef %n
br label %inner.loop
inner.loop:
%iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
%idx = add nsw nuw i64 %iv.inner, %mul
%gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx
%l = load i32, ptr %gep.src, align 4
%gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx
%add = add nsw i32 %l, 10
store i32 %add, ptr %gep.dst, align 4
%iv.inner.next = add nsw nuw i64 %iv.inner, 1
%inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n
br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
inner.exit:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m
br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
outer.exit:
ret void
}
Transformation seems to be correct! (syntactically equal)
-- 31. LoopVectorizePass
----------------------------------------
define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) {
entry:
br label %outer.loop
outer.loop:
%outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
%mul = mul nsw i64 %outer.iv, noundef %n
br label %inner.loop
inner.loop:
%iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
%idx = add nsw nuw i64 %iv.inner, %mul
%gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx
%l = load i32, ptr %gep.src, align 4
%gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx
%add = add nsw i32 %l, 10
store i32 %add, ptr %gep.dst, align 4
%iv.inner.next = add nsw nuw i64 %iv.inner, 1
%inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n
br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
inner.exit:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m
br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
outer.exit:
ret void
}
=>
define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) {
entry:
%src2 = ptrtoint ptr nocapture nowrite noundef %src to i64
%dst1 = ptrtoint ptr nocapture noundef %dst to i64
%#0 = sub i64 %dst1, %src2
br label %outer.loop
outer.loop:
%outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
%mul = mul nsw i64 %outer.iv, noundef %n
%min.iters.check = icmp ult i64 noundef %n, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck
vector.memcheck:
%diff.check = icmp ult i64 %#0, 16
br i1 %diff.check, label %scalar.ph, label %vector.ph
vector.ph:
%n.mod.vf = urem i64 noundef %n, 4
%n.vec = sub i64 noundef %n, %n.mod.vf
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%#1 = add i64 %index, 0
%#2 = add nsw nuw i64 %#1, %mul
%#3 = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %#2
%#4 = gep inbounds ptr %#3, 4 x i32 0
%wide.load = load <4 x i32>, ptr %#4, align 4
%#5 = gep inbounds ptr nocapture noundef %dst, 4 x i64 %#2
%#6 = add nsw <4 x i32> %wide.load, { 10, 10, 10, 10 }
%#7 = gep inbounds ptr %#5, 4 x i32 0
store <4 x i32> %#6, ptr %#7, align 4
%index.next = add nuw i64 %index, 4
%#8 = icmp eq i64 %index.next, %n.vec
br i1 %#8, label %middle.block, label %vector.body
middle.block:
%cmp.n = icmp eq i64 noundef %n, %n.vec
br i1 %cmp.n, label %inner.exit, label %scalar.ph
scalar.ph:
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %outer.loop ], [ 0, %vector.memcheck ]
br label %inner.loop
inner.loop:
%iv.inner = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.inner.next, %inner.loop ]
%idx = add nsw nuw i64 %iv.inner, %mul
%gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx
%l = load i32, ptr %gep.src, align 4
%gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx
%add = add nsw i32 %l, 10
store i32 %add, ptr %gep.dst, align 4
%iv.inner.next = add nsw nuw i64 %iv.inner, 1
%inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n
br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
inner.exit:
%outer.iv.next = add nsw nuw i64 %outer.iv, 1
%outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m
br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
outer.exit:
ret void
}
Transformation doesn't verify! (unsound)
ERROR: Source is more defined than target
Example:
ptr nocapture noundef %dst = pointer(non-local, block_id=1, offset=7, attrs=1) / Address=#x0000000000000008
ptr nocapture nowrite noundef %src = pointer(non-local, block_id=2, offset=6, attrs=3) / Address=#x0000000000000014
i64 noundef %m = #x0000000000000001 (1)
i64 noundef %n = #x0000000000000001 (1)
Source:
>> Jump to %outer.loop
i64 %outer.iv = #x0000000000000000 (0)
i64 %mul = #x0000000000000000 (0)
>> Jump to %inner.loop
i64 %iv.inner = #x0000000000000000 (0)
i64 %idx = #x0000000000000000 (0)
ptr %gep.src = pointer(non-local, block_id=2, offset=6, attrs=3) / Address=#x0000000000000014
i32 %l = poison
ptr %gep.dst = pointer(non-local, block_id=1, offset=7, attrs=1) / Address=#x0000000000000008
i32 %add = poison
i64 %iv.inner.next = #x0000000000000001 (1)
i1 %inner.exit.cond = #x1 (1)
>> Jump to %inner.exit
i64 %outer.iv.next = #x0000000000000001 (1)
i1 %outer.exit.cond = #x1 (1)
>> Jump to %outer.exit
SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 > size: 0 align: 4 alloc type: 0 alive: false address: 0
Block 1 > size: 12 align: 1 alloc type: 0 alive: true address: 1
Block 2 > size: 10 align: 1 alloc type: 0 alive: true address: 14
Target:
i64 %src2 = UB triggered!
Pass: LoopVectorizePass
Command line: '/home/nlopes/llvm/build/bin/opt' '-load=/home/nlopes/alive2/build/tv/tv.so' '-load-pass-plugin=/home/nlopes/alive2/build/tv/tv.so' '-tv-exit-on-error' '/bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll' '-passes=loop-vectorize' '-hoist-runtime-checks=false' '-force-vector-width=4' '-force-vector-interleave=1' '-S' '-tv-smt-to=20000' '-tv-report-dir=/home/nlopes/alive2/build/logs' '-tv-smt-stats'
Wrote bitcode to: "/home/nlopes/alive2/build/logs/runtime-checks-difference_vcbS0ddx_1GLN.bc"
------------------- SMT STATS -------------------
Num queries: 81
Num invalid: 0
Num skips: 0
Num trivial: 42 (34.1%)
Num timeout: 4 (4.9%)
Num errors: 0 (0.0%)
Num SAT: 55 (67.9%)
Num UNSAT: 22 (27.2%)
Alive2: Transform doesn't verify; aborting!
RUN: at line 1: /home/nlopes/alive2/build/opt-alive.sh /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll -passes=loop-vectorize -hoist-runtime-checks=false -force-vector-width=4 -force-vector-interleave=1 -S | /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll + /home/nlopes/alive2/build/opt-alive.sh /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll -passes=loop-vectorize -hoist-runtime-checks=false -force-vector-width=4 -force-vector-interleave=1 -S + /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll FileCheck error: '<stdin>' is empty. FileCheck command line: /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll