Test source: git
Source: <stdin> -- 1. ModuleToFunctionPassAdaptor ERROR: Unsupported attribute: noalias -- 1. PassManager<Function> : Skipping NOP ERROR: Unsupported attribute: noalias -- 2. LoopVectorizePass ERROR: Unsupported attribute: noalias -- 3. LoopVectorizePass ERROR: Unsupported attribute: noalias -- 4. PassManager<Function> : Skipping NOP ERROR: Unsupported attribute: noalias -- 5. PassManager<Function> : Skipping NOP -- 6. LoopVectorizePass ---------------------------------------- define void @cheap_icmp(ptr nocapture nowrite %pSrcA, ptr nocapture nowrite %pSrcB, ptr nocapture %pDst, i32 %blockSize) { %entry: %cmp.not8 = icmp eq i32 %blockSize, 0 br i1 %cmp.not8, label %while.end, label %while.body.preheader %while.body.preheader: br label %while.body %while.body: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ nocapture nowrite %pSrcA, %while.body.preheader ] %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ nocapture %pDst, %while.body.preheader ] %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ nocapture nowrite %pSrcB, %while.body.preheader ] %incdec.ptr = gep inbounds ptr %pSrcA.addr.011, 1 x i32 1 %0 = load i8, ptr %pSrcA.addr.011, align 1 %conv1 = sext i8 %0 to i32 %incdec.ptr2 = gep inbounds ptr %pSrcB.addr.09, 1 x i32 1 %1 = load i8, ptr %pSrcB.addr.09, align 1 %conv3 = sext i8 %1 to i32 %mul = mul nsw i32 %conv3, %conv1 %shr = ashr i32 %mul, 7 %2 = icmp slt i32 %shr, 127 %spec.select.i = select i1 %2, i32 %shr, i32 127 %conv4 = trunc i32 %spec.select.i to i8 %incdec.ptr5 = gep inbounds ptr %pDst.addr.010, 1 x i32 1 store i8 %conv4, ptr %pDst.addr.010, align 1 %dec = add i32 %blkCnt.012, 4294967295 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body %while.end.loopexit: br label %while.end %while.end: ret void } Transformation seems to be correct! (syntactically equal) -- 7. LoopVectorizePass ---------------------------------------- define void @cheap_icmp(ptr nocapture nowrite %pSrcA, ptr nocapture nowrite %pSrcB, ptr nocapture %pDst, i32 %blockSize) { %entry: %cmp.not8 = icmp eq i32 %blockSize, 0 br i1 %cmp.not8, label %while.end, label %while.body.preheader %while.body.preheader: br label %while.body %while.body: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ nocapture nowrite %pSrcA, %while.body.preheader ] %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ nocapture %pDst, %while.body.preheader ] %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ nocapture nowrite %pSrcB, %while.body.preheader ] %incdec.ptr = gep inbounds ptr %pSrcA.addr.011, 1 x i32 1 %0 = load i8, ptr %pSrcA.addr.011, align 1 %conv1 = sext i8 %0 to i32 %incdec.ptr2 = gep inbounds ptr %pSrcB.addr.09, 1 x i32 1 %1 = load i8, ptr %pSrcB.addr.09, align 1 %conv3 = sext i8 %1 to i32 %mul = mul nsw i32 %conv3, %conv1 %shr = ashr i32 %mul, 7 %2 = icmp slt i32 %shr, 127 %spec.select.i = select i1 %2, i32 %shr, i32 127 %conv4 = trunc i32 %spec.select.i to i8 %incdec.ptr5 = gep inbounds ptr %pDst.addr.010, 1 x i32 1 store i8 %conv4, ptr %pDst.addr.010, align 1 %dec = add i32 %blkCnt.012, 4294967295 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body %while.end.loopexit: br label %while.end %while.end: ret void } => define void @cheap_icmp(ptr nocapture nowrite %pSrcA, ptr nocapture nowrite %pSrcB, ptr nocapture %pDst, i32 %blockSize) { %entry: %pSrcB3 = ptrtoint ptr nocapture nowrite %pSrcB to i32 %pSrcA2 = ptrtoint ptr nocapture nowrite %pSrcA to i32 %pDst1 = ptrtoint ptr nocapture %pDst to i32 %cmp.not8 = icmp eq i32 %blockSize, 0 br i1 %cmp.not8, label %while.end, label %while.body.preheader %while.body.preheader: %min.iters.check = icmp ult i32 %blockSize, 16 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck %vector.memcheck: %0 = sub i32 %pDst1, %pSrcA2 %diff.check = icmp ult i32 %0, 16 %1 = sub i32 %pDst1, %pSrcB3 %diff.check4 = icmp ult i32 %1, 16 %conflict.rdx = or i1 %diff.check, %diff.check4 br i1 %conflict.rdx, label %scalar.ph, label %vector.ph %vector.ph: %n.mod.vf = urem i32 %blockSize, 16 %n.vec = sub i32 %blockSize, %n.mod.vf %ind.end = sub i32 %blockSize, %n.vec %ind.end5 = gep ptr nocapture nowrite %pSrcA, 1 x i32 %n.vec %ind.end7 = gep ptr nocapture %pDst, 1 x i32 %n.vec %ind.end9 = gep ptr nocapture nowrite %pSrcB, 1 x i32 %n.vec br label %vector.body %vector.body: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %2 = add i32 %index, 0 %next.gep = gep ptr nocapture nowrite %pSrcA, 1 x i32 %2 %3 = add i32 %index, 0 %next.gep11 = gep ptr nocapture %pDst, 1 x i32 %3 %4 = add i32 %index, 0 %next.gep12 = gep ptr nocapture nowrite %pSrcB, 1 x i32 %4 %5 = gep ptr %next.gep, 1 x i32 0 %wide.load = load <16 x i8>, ptr %5, align 1 %6 = sext <16 x i8> %wide.load to <16 x i32> %7 = gep ptr %next.gep12, 1 x i32 0 %wide.load13 = load <16 x i8>, ptr %7, align 1 %8 = sext <16 x i8> %wide.load13 to <16 x i32> %9 = mul nsw <16 x i32> %8, %6 %10 = ashr <16 x i32> %9, { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 } %11 = icmp slt <16 x i32> %10, { 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127 } %12 = select <16 x i1> %11, <16 x i32> %10, <16 x i32> { 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127 } %13 = trunc <16 x i32> %12 to <16 x i8> %14 = gep ptr %next.gep11, 1 x i32 0 store <16 x i8> %13, ptr %14, align 1 %index.next = add nuw i32 %index, 16 %15 = icmp eq i32 %index.next, %n.vec br i1 %15, label %middle.block, label %vector.body %middle.block: %cmp.n = icmp eq i32 %blockSize, %n.vec br i1 %cmp.n, label %while.end.loopexit, label %scalar.ph %scalar.ph: %bc.resume.val = phi i32 [ %ind.end, %middle.block ], [ %blockSize, %while.body.preheader ], [ %blockSize, %vector.memcheck ] %bc.resume.val6 = phi ptr [ %ind.end5, %middle.block ], [ nocapture nowrite %pSrcA, %while.body.preheader ], [ nocapture nowrite %pSrcA, %vector.memcheck ] %bc.resume.val8 = phi ptr [ %ind.end7, %middle.block ], [ nocapture %pDst, %while.body.preheader ], [ nocapture %pDst, %vector.memcheck ] %bc.resume.val10 = phi ptr [ %ind.end9, %middle.block ], [ nocapture nowrite %pSrcB, %while.body.preheader ], [ nocapture nowrite %pSrcB, %vector.memcheck ] br label %while.body %while.body: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %bc.resume.val, %scalar.ph ] %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %bc.resume.val6, %scalar.ph ] %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %bc.resume.val8, %scalar.ph ] %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %bc.resume.val10, %scalar.ph ] %incdec.ptr = gep inbounds ptr %pSrcA.addr.011, 1 x i32 1 %16 = load i8, ptr %pSrcA.addr.011, align 1 %conv1 = sext i8 %16 to i32 %incdec.ptr2 = gep inbounds ptr %pSrcB.addr.09, 1 x i32 1 %17 = load i8, ptr %pSrcB.addr.09, align 1 %conv3 = sext i8 %17 to i32 %mul = mul nsw i32 %conv3, %conv1 %shr = ashr i32 %mul, 7 %18 = icmp slt i32 %shr, 127 %spec.select.i = select i1 %18, i32 %shr, i32 127 %conv4 = trunc i32 %spec.select.i to i8 %incdec.ptr5 = gep inbounds ptr %pDst.addr.010, 1 x i32 1 store i8 %conv4, ptr %pDst.addr.010, align 1 %dec = add i32 %blkCnt.012, 4294967295 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body %while.end.loopexit: br label %while.end %while.end: ret void } Transformation doesn't verify! (unsound) ERROR: Source is more defined than target Example: ptr nocapture nowrite %pSrcA = poison ptr nocapture nowrite %pSrcB = poison ptr nocapture %pDst = poison i32 %blockSize = #x00000000 (0) Source: i1 %cmp.not8 = #x1 (1) >> Jump to %while.end SOURCE MEMORY STATE =================== NON-LOCAL BLOCKS: Block 0 > size: 0 align: 1 alloc type: 0 address: 0 Block 1 > size: 0 align: 8 alloc type: 0 address: 8 Block 2 > size: 0 align: 1 alloc type: 0 address: 8 Block 3 > size: 0 align: 1 alloc type: 0 address: 9 Target: i32 %pSrcB3 = UB triggered! Pass: LoopVectorizePass Command line: '/home/nlopes/llvm/build/bin/opt' '-load=/home/nlopes/alive2/build/tv/tv.so' '-load-pass-plugin=/home/nlopes/alive2/build/tv/tv.so' '-tv-exit-on-error' '-passes=loop-vectorize' '-debug-only=loop-vectorize' '-disable-output' '-tv-smt-to=20000' '-tv-report-dir=/home/nlopes/alive2/build/logs' '-tv-smt-stats' Wrote bitcode to: "/home/nlopes/alive2/build/logs/in_3EmhIMYi_DaxR.bc" ------------------- SMT STATS ------------------- Num queries: 29 Num invalid: 0 Num skips: 0 Num trivial: 2 (6.5%) Num timeout: 0 (0.0%) Num errors: 0 (0.0%) Num SAT: 27 (93.1%) Num UNSAT: 2 (6.9%) Alive2: Transform doesn't verify; aborting!
+ : 'RUN: at line 1' + /home/nlopes/alive2/build/opt-alive.sh -passes=loop-vectorize -debug-only=loop-vectorize -disable-output + /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll:240:10: error: CHECK: expected string not found in input ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp ^ <stdin>:490:22: note: scanning from here LV: Selecting VF: 16. ^ <stdin>:530:1: note: possible intended match here LV: Found a vectorizable loop (16) in <stdin> ^ Input file: <stdin> Check file: /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll -dump-input=help explains the following input dump. Input was: <<<<<< . . . 485: LV: Found an estimated cost of 2 for VF 16 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 486: LV: Found an estimated cost of 1 for VF 16 For instruction: %dec = add i32 %blkCnt.012, -1 487: LV: Found an estimated cost of 1 for VF 16 For instruction: %cmp.not = icmp eq i32 %dec, 0 488: LV: Found an estimated cost of 0 for VF 16 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body 489: LV: Vector loop of width 16 costs: 3. 490: LV: Selecting VF: 16. check:240'0 X error: no match found 491: LV(REG): Calculating max register usage: check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 492: LV(REG): At #0 Interval # 0 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 493: LV(REG): At #1 Interval # 1 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 494: LV(REG): At #2 Interval # 2 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 495: LV(REG): At #3 Interval # 3 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ . . . 525: 1 for %diff.check4 = icmp ult i32 %1, 16 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 526: 1 for %conflict.rdx = or i1 %diff.check, %diff.check4 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 527: Total cost of runtime checks: 5 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 528: LV: Minimum required TC for runtime checks to be profitable:16 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 529: LV: Interleaving is not beneficial. check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 530: LV: Found a vectorizable loop (16) in <stdin> check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check:240'1 ? possible intended match 531: LEV: Epilogue vectorization is not profitable for this loop check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 532: Executing best plan with VF=16, UF=1 check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 533: LV: Interleaving disabled by the pass manager check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 534: LV: Vectorizing: innermost loop. check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 535: check:240'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >>>>>>