Test source: git
Source: <stdin> ---------------------------------------- define void @recurrence_1(ptr nocapture nowrite %a, ptr nocapture %b, i32 %n) { %entry: br label %for.preheader %for.preheader: %arrayidx.phi.trans.insert = gep inbounds ptr nocapture nowrite %a, 4 x i64 0 %pre_load = load i32, ptr %arrayidx.phi.trans.insert, align 4 br label %scalar.body %scalar.body: %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ] %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ] %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 %arrayidx32 = gep inbounds ptr nocapture nowrite %a, 4 x i64 %indvars.iv.next %1 = load i32, ptr %arrayidx32, align 4 %arrayidx34 = gep inbounds ptr nocapture %b, 4 x i64 %indvars.iv %add35 = add i32 %1, %0 store i32 %add35, ptr %arrayidx34, align 4 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n br i1 %exitcond, label %for.exit, label %scalar.body %for.exit: ret void } => define void @recurrence_1(ptr nocapture nowrite %a, ptr nocapture %b, i32 %n) { %entry: %a2 = ptrtoint ptr nocapture nowrite %a to i64 %b1 = ptrtoint ptr nocapture %b to i64 br label %for.preheader %for.preheader: %pre_load = load i32, ptr nocapture nowrite %a, align 4 %0 = add i32 %n, 4294967295 %1 = zext i32 %0 to i64 %2 = add nsw nuw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck %vector.memcheck: %3 = add i64 %a2, 4 %4 = sub i64 %b1, %3 %diff.check = icmp ult i64 %4, 16 br i1 %diff.check, label %scalar.ph, label %vector.ph %vector.ph: %n.vec = and i64 %2, 8589934588 %vector.recur.init = insertelement <4 x i32> poison, i32 %pre_load, i64 3 br label %vector.body %vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ] %5 = or i64 %index, 1 %6 = gep inbounds ptr nocapture nowrite %a, 4 x i64 %5 %7 = bitcast ptr %6 to ptr %wide.load = load <4 x i32>, ptr %7, align 4 %8 = shufflevector <4 x i32> %vector.recur, <4 x i32> %wide.load, 3, 4, 5, 6 %9 = gep inbounds ptr nocapture %b, 4 x i64 %index %10 = add <4 x i32> %wide.load, %8 %11 = bitcast ptr %9 to ptr store <4 x i32> %10, ptr %11, align 4 %index.next = add nuw i64 %index, 4 %12 = icmp eq i64 %index.next, %n.vec br i1 %12, label %middle.block, label %vector.body %middle.block: %cmp.n = icmp eq i64 %2, %n.vec %vector.recur.extract = extractelement <4 x i32> %wide.load, i64 3 br i1 %cmp.n, label %for.exit, label %scalar.ph %scalar.ph: %scalar.recur.init = phi i32 [ %pre_load, %vector.memcheck ], [ %pre_load, %for.preheader ], [ %vector.recur.extract, %middle.block ] %bc.resume.val = phi i64 [ 0, %vector.memcheck ], [ 0, %for.preheader ], [ %n.vec, %middle.block ] br label %scalar.body %scalar.body: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ %13, %scalar.body ] %indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %scalar.body ] %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 %arrayidx32 = gep inbounds ptr nocapture nowrite %a, 4 x i64 %indvars.iv.next %13 = load i32, ptr %arrayidx32, align 4 %arrayidx34 = gep inbounds ptr nocapture %b, 4 x i64 %indvars.iv %add35 = add i32 %13, %scalar.recur store i32 %add35, ptr %arrayidx34, align 4 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n br i1 %exitcond, label %for.exit, label %scalar.body %for.exit: ret void } Transformation doesn't verify! ERROR: Source is more defined than target Example: ptr nocapture nowrite %a = pointer(non-local, block_id=1, offset=4611686018427387883, attrs=3) ptr nocapture %b = pointer(non-local, block_id=1, offset=4611686018427387895, attrs=1) i32 %n = #x00000001 (1) Source: >> Jump to %for.preheader ptr %arrayidx.phi.trans.insert = pointer(non-local, block_id=1, offset=4611686018427387883, attrs=3) i32 %pre_load = poison >> Jump to %scalar.body i32 %0 = poison i64 %indvars.iv = #x0000000000000000 (0) i64 %indvars.iv.next = #x0000000000000001 (1) ptr %arrayidx32 = pointer(non-local, block_id=1, offset=4611686018427387887, attrs=3) i32 %1 = poison ptr %arrayidx34 = pointer(non-local, block_id=1, offset=4611686018427387895, attrs=1) i32 %add35 = poison i32 %lftr.wideiv = #x00000001 (1) i1 %exitcond = #x1 (1) >> Jump to %for.exit SOURCE MEMORY STATE =================== NON-LOCAL BLOCKS: Block 0 > size: 0 align: 4 alloc type: 0 address: 0 Block 1 > size: 4611686018427387904 align: 4 alloc type: 0 address: 1 Block 2 > size: 102760450 align: 2 alloc type: 0 address: 9223372036855300096 Target: i64 %a2 = UB triggered! ------------------- SMT STATS ------------------- Num queries: 8 Num invalid: 0 Num skips: 0 Num trivial: 10 (55.6%) Num timeout: 0 (0.0%) Num errors: 0 (0.0%) Num SAT: 8 (100.0%) Num UNSAT: 0 (0.0%) Alive2: Transform doesn't verify; aborting!
+ : 'RUN: at line 2' + /home/nlopes/alive2/build/opt-alive.sh -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S + /tmp/nlopes/llvm/build/bin/FileCheck /tmp/nlopes/llvm/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop FileCheck error: '<stdin>' is empty. FileCheck command line: /tmp/nlopes/llvm/build/bin/FileCheck /tmp/nlopes/llvm/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll