Test Failure: Transforms/LoopVectorize/first-order-recurrence.ll

Test source: git

Comments: Bug in IR semantics or optimization: escapes noescape pointer

Log:

Source: <stdin>

----------------------------------------
define void @recurrence_1(ptr nocapture nowrite %a, ptr nocapture %b, i32 %n) {
%entry:
  br label %for.preheader

%for.preheader:
  %arrayidx.phi.trans.insert = gep inbounds ptr nocapture nowrite %a, 4 x i64 0
  %pre_load = load i32, ptr %arrayidx.phi.trans.insert, align 4
  br label %scalar.body

%scalar.body:
  %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
  %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1
  %arrayidx32 = gep inbounds ptr nocapture nowrite %a, 4 x i64 %indvars.iv.next
  %1 = load i32, ptr %arrayidx32, align 4
  %arrayidx34 = gep inbounds ptr nocapture %b, 4 x i64 %indvars.iv
  %add35 = add i32 %1, %0
  store i32 %add35, ptr %arrayidx34, align 4
  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  %exitcond = icmp eq i32 %lftr.wideiv, %n
  br i1 %exitcond, label %for.exit, label %scalar.body

%for.exit:
  ret void
}
=>
define void @recurrence_1(ptr nocapture nowrite %a, ptr nocapture %b, i32 %n) {
%entry:
  %a2 = ptrtoint ptr nocapture nowrite %a to i64
  %b1 = ptrtoint ptr nocapture %b to i64
  br label %for.preheader

%for.preheader:
  %pre_load = load i32, ptr nocapture nowrite %a, align 4
  %0 = add i32 %n, 4294967295
  %1 = zext i32 %0 to i64
  %2 = add nsw nuw i64 %1, 1
  %min.iters.check = icmp ult i32 %0, 3
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

%vector.memcheck:
  %3 = add i64 %a2, 4
  %4 = sub i64 %b1, %3
  %diff.check = icmp ult i64 %4, 16
  br i1 %diff.check, label %scalar.ph, label %vector.ph

%vector.ph:
  %n.vec = and i64 %2, 8589934588
  %vector.recur.init = insertelement <4 x i32> poison, i32 %pre_load, i64 3
  br label %vector.body

%vector.body:
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ]
  %5 = or i64 %index, 1
  %6 = gep inbounds ptr nocapture nowrite %a, 4 x i64 %5
  %7 = bitcast ptr %6 to ptr
  %wide.load = load <4 x i32>, ptr %7, align 4
  %8 = shufflevector <4 x i32> %vector.recur, <4 x i32> %wide.load, 3, 4, 5, 6
  %9 = gep inbounds ptr nocapture %b, 4 x i64 %index
  %10 = add <4 x i32> %wide.load, %8
  %11 = bitcast ptr %9 to ptr
  store <4 x i32> %10, ptr %11, align 4
  %index.next = add nuw i64 %index, 4
  %12 = icmp eq i64 %index.next, %n.vec
  br i1 %12, label %middle.block, label %vector.body

%middle.block:
  %cmp.n = icmp eq i64 %2, %n.vec
  %vector.recur.extract = extractelement <4 x i32> %wide.load, i64 3
  br i1 %cmp.n, label %for.exit, label %scalar.ph

%scalar.ph:
  %scalar.recur.init = phi i32 [ %pre_load, %vector.memcheck ], [ %pre_load, %for.preheader ], [ %vector.recur.extract, %middle.block ]
  %bc.resume.val = phi i64 [ 0, %vector.memcheck ], [ 0, %for.preheader ], [ %n.vec, %middle.block ]
  br label %scalar.body

%scalar.body:
  %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ %13, %scalar.body ]
  %indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %scalar.body ]
  %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1
  %arrayidx32 = gep inbounds ptr nocapture nowrite %a, 4 x i64 %indvars.iv.next
  %13 = load i32, ptr %arrayidx32, align 4
  %arrayidx34 = gep inbounds ptr nocapture %b, 4 x i64 %indvars.iv
  %add35 = add i32 %13, %scalar.recur
  store i32 %add35, ptr %arrayidx34, align 4
  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  %exitcond = icmp eq i32 %lftr.wideiv, %n
  br i1 %exitcond, label %for.exit, label %scalar.body

%for.exit:
  ret void
}
Transformation doesn't verify!
ERROR: Source is more defined than target

Example:
ptr nocapture nowrite %a = pointer(non-local, block_id=1, offset=41, attrs=3)
ptr nocapture %b = pointer(non-local, block_id=2, offset=4611686018427387895, attrs=1)
i32 %n = #x00000001 (1)

Source:
  >> Jump to %for.preheader
ptr %arrayidx.phi.trans.insert = pointer(non-local, block_id=1, offset=41, attrs=3)
i32 %pre_load = poison
  >> Jump to %scalar.body
i32 %0 = poison
i64 %indvars.iv = #x0000000000000000 (0)
i64 %indvars.iv.next = #x0000000000000001 (1)
ptr %arrayidx32 = pointer(non-local, block_id=1, offset=45, attrs=3)
i32 %1 = poison
ptr %arrayidx34 = pointer(non-local, block_id=2, offset=4611686018427387895, attrs=1)
i32 %add35 = poison
i32 %lftr.wideiv = #x00000001 (1)
i1 %exitcond = #x1 (1)
  >> Jump to %for.exit

SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 >	size: 0	align: 4	alloc type: 0	address: 0
Block 1 >	size: 8209	align: 4	alloc type: 0	address: 221187
Block 2 >	size: 4611755713667006468	align: 2	alloc type: 0	address: 71042248945677

Target:
i64 %a2 = UB triggered!



------------------- SMT STATS -------------------
Num queries: 8
Num invalid: 0
Num skips:   0
Num trivial: 10 (55.6%)
Num timeout: 0 (0.0%)
Num errors:  0 (0.0%)
Num SAT:     8 (100.0%)
Num UNSAT:   0 (0.0%)
Alive2: Transform doesn't verify; aborting!

stderr:

+ : 'RUN: at line 2'
+ /home/nlopes/alive2/build/opt-alive.sh -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S
+ /home/nlopes/llvm/build/bin/FileCheck /home/nlopes/llvm/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop
remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop
remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop

FileCheck error: '<stdin>' is empty.
FileCheck command line:  /home/nlopes/llvm/build/bin/FileCheck /home/nlopes/llvm/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll

 

<-- Back