Test Failure: Transforms/LoopVectorize/ARM/mve-icmpcost.ll

Test source: git

Log:

Source: <stdin>
-- 1. ModuleToFunctionPassAdaptor
ERROR: Unsupported attribute: noalias
-- 1. PassManager<Function> : Skipping NOP
ERROR: Unsupported attribute: noalias
-- 2. LoopVectorizePass
ERROR: Unsupported attribute: noalias
-- 3. LoopVectorizePass
ERROR: Unsupported attribute: noalias
-- 4. PassManager<Function> : Skipping NOP
ERROR: Unsupported attribute: noalias
-- 5. PassManager<Function> : Skipping NOP
-- 6. LoopVectorizePass

----------------------------------------
define void @cheap_icmp(ptr nocapture nowrite %pSrcA, ptr nocapture nowrite %pSrcB, ptr nocapture %pDst, i32 %blockSize) {
%entry:
  %cmp.not8 = icmp eq i32 %blockSize, 0
  br i1 %cmp.not8, label %while.end, label %while.body.preheader

%while.body.preheader:
  br label %while.body

%while.body:
  %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
  %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ nocapture nowrite %pSrcA, %while.body.preheader ]
  %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ nocapture %pDst, %while.body.preheader ]
  %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ nocapture nowrite %pSrcB, %while.body.preheader ]
  %incdec.ptr = gep inbounds ptr %pSrcA.addr.011, 1 x i32 1
  %0 = load i8, ptr %pSrcA.addr.011, align 1
  %conv1 = sext i8 %0 to i32
  %incdec.ptr2 = gep inbounds ptr %pSrcB.addr.09, 1 x i32 1
  %1 = load i8, ptr %pSrcB.addr.09, align 1
  %conv3 = sext i8 %1 to i32
  %mul = mul nsw i32 %conv3, %conv1
  %shr = ashr i32 %mul, 7
  %2 = icmp slt i32 %shr, 127
  %spec.select.i = select i1 %2, i32 %shr, i32 127
  %conv4 = trunc i32 %spec.select.i to i8
  %incdec.ptr5 = gep inbounds ptr %pDst.addr.010, 1 x i32 1
  store i8 %conv4, ptr %pDst.addr.010, align 1
  %dec = add i32 %blkCnt.012, 4294967295
  %cmp.not = icmp eq i32 %dec, 0
  br i1 %cmp.not, label %while.end.loopexit, label %while.body

%while.end.loopexit:
  br label %while.end

%while.end:
  ret void
}
Transformation seems to be correct! (syntactically equal)

-- 7. LoopVectorizePass

----------------------------------------
define void @cheap_icmp(ptr nocapture nowrite %pSrcA, ptr nocapture nowrite %pSrcB, ptr nocapture %pDst, i32 %blockSize) {
%entry:
  %cmp.not8 = icmp eq i32 %blockSize, 0
  br i1 %cmp.not8, label %while.end, label %while.body.preheader

%while.body.preheader:
  br label %while.body

%while.body:
  %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
  %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ nocapture nowrite %pSrcA, %while.body.preheader ]
  %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ nocapture %pDst, %while.body.preheader ]
  %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ nocapture nowrite %pSrcB, %while.body.preheader ]
  %incdec.ptr = gep inbounds ptr %pSrcA.addr.011, 1 x i32 1
  %0 = load i8, ptr %pSrcA.addr.011, align 1
  %conv1 = sext i8 %0 to i32
  %incdec.ptr2 = gep inbounds ptr %pSrcB.addr.09, 1 x i32 1
  %1 = load i8, ptr %pSrcB.addr.09, align 1
  %conv3 = sext i8 %1 to i32
  %mul = mul nsw i32 %conv3, %conv1
  %shr = ashr i32 %mul, 7
  %2 = icmp slt i32 %shr, 127
  %spec.select.i = select i1 %2, i32 %shr, i32 127
  %conv4 = trunc i32 %spec.select.i to i8
  %incdec.ptr5 = gep inbounds ptr %pDst.addr.010, 1 x i32 1
  store i8 %conv4, ptr %pDst.addr.010, align 1
  %dec = add i32 %blkCnt.012, 4294967295
  %cmp.not = icmp eq i32 %dec, 0
  br i1 %cmp.not, label %while.end.loopexit, label %while.body

%while.end.loopexit:
  br label %while.end

%while.end:
  ret void
}
=>
define void @cheap_icmp(ptr nocapture nowrite %pSrcA, ptr nocapture nowrite %pSrcB, ptr nocapture %pDst, i32 %blockSize) {
%entry:
  %pSrcB3 = ptrtoint ptr nocapture nowrite %pSrcB to i32
  %pSrcA2 = ptrtoint ptr nocapture nowrite %pSrcA to i32
  %pDst1 = ptrtoint ptr nocapture %pDst to i32
  %cmp.not8 = icmp eq i32 %blockSize, 0
  br i1 %cmp.not8, label %while.end, label %while.body.preheader

%while.body.preheader:
  %min.iters.check = icmp ult i32 %blockSize, 16
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

%vector.memcheck:
  %0 = sub i32 %pDst1, %pSrcA2
  %diff.check = icmp ult i32 %0, 16
  %1 = sub i32 %pDst1, %pSrcB3
  %diff.check4 = icmp ult i32 %1, 16
  %conflict.rdx = or i1 %diff.check, %diff.check4
  br i1 %conflict.rdx, label %scalar.ph, label %vector.ph

%vector.ph:
  %n.mod.vf = urem i32 %blockSize, 16
  %n.vec = sub i32 %blockSize, %n.mod.vf
  %ind.end = sub i32 %blockSize, %n.vec
  %ind.end5 = gep ptr nocapture nowrite %pSrcA, 1 x i32 %n.vec
  %ind.end7 = gep ptr nocapture %pDst, 1 x i32 %n.vec
  %ind.end9 = gep ptr nocapture nowrite %pSrcB, 1 x i32 %n.vec
  br label %vector.body

%vector.body:
  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %2 = add i32 %index, 0
  %next.gep = gep ptr nocapture nowrite %pSrcA, 1 x i32 %2
  %3 = add i32 %index, 0
  %next.gep11 = gep ptr nocapture %pDst, 1 x i32 %3
  %4 = add i32 %index, 0
  %next.gep12 = gep ptr nocapture nowrite %pSrcB, 1 x i32 %4
  %5 = gep ptr %next.gep, 1 x i32 0
  %wide.load = load <16 x i8>, ptr %5, align 1
  %6 = sext <16 x i8> %wide.load to <16 x i32>
  %7 = gep ptr %next.gep12, 1 x i32 0
  %wide.load13 = load <16 x i8>, ptr %7, align 1
  %8 = sext <16 x i8> %wide.load13 to <16 x i32>
  %9 = mul nsw <16 x i32> %8, %6
  %10 = ashr <16 x i32> %9, { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }
  %11 = icmp slt <16 x i32> %10, { 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127 }
  %12 = select <16 x i1> %11, <16 x i32> %10, <16 x i32> { 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127 }
  %13 = trunc <16 x i32> %12 to <16 x i8>
  %14 = gep ptr %next.gep11, 1 x i32 0
  store <16 x i8> %13, ptr %14, align 1
  %index.next = add nuw i32 %index, 16
  %15 = icmp eq i32 %index.next, %n.vec
  br i1 %15, label %middle.block, label %vector.body

%middle.block:
  %cmp.n = icmp eq i32 %blockSize, %n.vec
  br i1 %cmp.n, label %while.end.loopexit, label %scalar.ph

%scalar.ph:
  %bc.resume.val = phi i32 [ %ind.end, %middle.block ], [ %blockSize, %while.body.preheader ], [ %blockSize, %vector.memcheck ]
  %bc.resume.val6 = phi ptr [ %ind.end5, %middle.block ], [ nocapture nowrite %pSrcA, %while.body.preheader ], [ nocapture nowrite %pSrcA, %vector.memcheck ]
  %bc.resume.val8 = phi ptr [ %ind.end7, %middle.block ], [ nocapture %pDst, %while.body.preheader ], [ nocapture %pDst, %vector.memcheck ]
  %bc.resume.val10 = phi ptr [ %ind.end9, %middle.block ], [ nocapture nowrite %pSrcB, %while.body.preheader ], [ nocapture nowrite %pSrcB, %vector.memcheck ]
  br label %while.body

%while.body:
  %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %bc.resume.val, %scalar.ph ]
  %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %bc.resume.val6, %scalar.ph ]
  %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %bc.resume.val8, %scalar.ph ]
  %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %bc.resume.val10, %scalar.ph ]
  %incdec.ptr = gep inbounds ptr %pSrcA.addr.011, 1 x i32 1
  %16 = load i8, ptr %pSrcA.addr.011, align 1
  %conv1 = sext i8 %16 to i32
  %incdec.ptr2 = gep inbounds ptr %pSrcB.addr.09, 1 x i32 1
  %17 = load i8, ptr %pSrcB.addr.09, align 1
  %conv3 = sext i8 %17 to i32
  %mul = mul nsw i32 %conv3, %conv1
  %shr = ashr i32 %mul, 7
  %18 = icmp slt i32 %shr, 127
  %spec.select.i = select i1 %18, i32 %shr, i32 127
  %conv4 = trunc i32 %spec.select.i to i8
  %incdec.ptr5 = gep inbounds ptr %pDst.addr.010, 1 x i32 1
  store i8 %conv4, ptr %pDst.addr.010, align 1
  %dec = add i32 %blkCnt.012, 4294967295
  %cmp.not = icmp eq i32 %dec, 0
  br i1 %cmp.not, label %while.end.loopexit, label %while.body

%while.end.loopexit:
  br label %while.end

%while.end:
  ret void
}
Transformation doesn't verify! (unsound)
ERROR: Source is more defined than target

Example:
ptr nocapture nowrite %pSrcA = pointer(non-local, block_id=1, offset=1073741822, attrs=3)
ptr nocapture nowrite %pSrcB = pointer(non-local, block_id=1, offset=1073741822, attrs=3)
ptr nocapture %pDst = pointer(non-local, block_id=1, offset=1073741822, attrs=1)
i32 %blockSize = #x00000001 (1)

Source:
i1 %cmp.not8 = #x0 (0)
  >> Jump to %while.body.preheader
  >> Jump to %while.body
i32 %blkCnt.012 = #x00000001 (1)
ptr %pSrcA.addr.011 = pointer(non-local, block_id=1, offset=1073741822, attrs=3)
ptr %pDst.addr.010 = pointer(non-local, block_id=1, offset=1073741822, attrs=1)
ptr %pSrcB.addr.09 = pointer(non-local, block_id=1, offset=1073741822, attrs=3)
ptr %incdec.ptr = pointer(non-local, block_id=1, offset=1073741823, attrs=3)
i8 %0 = poison
i32 %conv1 = poison
ptr %incdec.ptr2 = pointer(non-local, block_id=1, offset=1073741823, attrs=3)
i8 %1 = poison
i32 %conv3 = poison
i32 %mul = poison
i32 %shr = poison
i1 %2 = poison
i32 %spec.select.i = poison
i8 %conv4 = poison
ptr %incdec.ptr5 = pointer(non-local, block_id=1, offset=1073741823, attrs=1)
i32 %dec = #x00000000 (0)
i1 %cmp.not = #x1 (1)
  >> Jump to %while.end.loopexit
  >> Jump to %while.end

SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 >	size: 0	align: 1	alloc type: 0	address: 0
Block 1 >	size: 1073741824	align: 4	alloc type: 0	address: 2281701377
Block 2 >	size: 1	align: 131072	alloc type: 0	address: 2281701376
Block 3 >	size: 3	align: 2	alloc type: 0	address: 134217728

Target:
i32 %pSrcB3 = UB triggered!


Pass: LoopVectorizePass
Command line: '/home/nlopes/llvm/build/bin/opt' '-load=/home/nlopes/alive2/build/tv/tv.so' '-load-pass-plugin=/home/nlopes/alive2/build/tv/tv.so' '-tv-exit-on-error' '-passes=loop-vectorize' '-debug-only=loop-vectorize' '-disable-output' '-tv-smt-to=20000' '-tv-report-dir=/home/nlopes/alive2/build/logs' '-tv-smt-stats'
Wrote bitcode to: "/home/nlopes/alive2/build/logs/in_qbK98ltA_6Sqp.bc"


------------------- SMT STATS -------------------
Num queries: 13
Num invalid: 0
Num skips:   0
Num trivial: 14 (51.9%)
Num timeout: 0 (0.0%)
Num errors:  0 (0.0%)
Num SAT:     13 (100.0%)
Num UNSAT:   0 (0.0%)
Alive2: Transform doesn't verify; aborting!

stderr:

+ : 'RUN: at line 1'
+ /home/nlopes/alive2/build/opt-alive.sh -passes=loop-vectorize -debug-only=loop-vectorize -disable-output
+ /bitbucket/nlopes/llvm/build/bin/FileCheck /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll
/bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll:240:10: error: CHECK: expected string not found in input
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp
         ^
<stdin>:489:22: note: scanning from here
LV: Selecting VF: 16.
                     ^
<stdin>:529:1: note: possible intended match here
LV: Found a vectorizable loop (16) in <stdin>
^

Input file: <stdin>
Check file: /bitbucket/nlopes/llvm/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
             .
             .
             .
           484: LV: Found an estimated cost of 2 for VF 16 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 
           485: LV: Found an estimated cost of 1 for VF 16 For instruction: %dec = add i32 %blkCnt.012, -1 
           486: LV: Found an estimated cost of 1 for VF 16 For instruction: %cmp.not = icmp eq i32 %dec, 0 
           487: LV: Found an estimated cost of 0 for VF 16 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body 
           488: LV: Vector loop of width 16 costs: 3. 
           489: LV: Selecting VF: 16. 
check:240'0                          X error: no match found
           490: LV(REG): Calculating max register usage: 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           491: LV(REG): At #0 Interval # 0 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           492: LV(REG): At #1 Interval # 1 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           493: LV(REG): At #2 Interval # 2 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           494: LV(REG): At #3 Interval # 3 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             .
             .
             .
           524:  1 for %diff.check4 = icmp ult i32 %1, 16 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           525:  1 for %conflict.rdx = or i1 %diff.check, %diff.check4 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           526: Total cost of runtime checks: 5 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           527: LV: Minimum required TC for runtime checks to be profitable:16 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           528: LV: Interleaving is not beneficial. 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           529: LV: Found a vectorizable loop (16) in <stdin> 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
check:240'1     ?                                              possible intended match
           530: LEV: Epilogue vectorization is not profitable for this loop 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           531: Executing best plan with VF=16, UF=1 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           532: LV: Interleaving disabled by the pass manager 
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           533:  
check:240'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>>>>>>

 

<-- Back