Skip to content

Commit 25d2e78

Browse files
wenju-hebader
andauthored
[SYCLLowerIR][GlobalOffset] Avoid erasing live non-GEP/PHI defs (intel#20447)
Follow-up fix for 30e1d2f, in which collectGlobalOffsetUses walked general def-use chains, not just GEP/PHI. When removing collected pointer defs, only erase instructions that has no use. Non-GEP/PHI intermediates may still have users. --------- Co-authored-by: Alexey Bader <alexey.bader@intel.com>
1 parent e9248dd commit 25d2e78

2 files changed

Lines changed: 56 additions & 4 deletions

File tree

llvm/lib/SYCLLowerIR/GlobalOffset.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ ModulePass *llvm::createGlobalOffsetPassLegacy() {
6060
return new GlobalOffsetLegacy();
6161
}
6262

63-
// Helper function to collect all GEPs, PHIs and Loads in post-order.
63+
// Helper function to collect all Uses of Load's pointer operand in post-order.
6464
static void collectGlobalOffsetUses(Function *ImplicitOffsetIntrinsic,
6565
SmallVectorImpl<Instruction *> &LoadPtrUses,
6666
SmallVectorImpl<Instruction *> &Loads) {
@@ -246,11 +246,14 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
246246
L->eraseFromParent();
247247
}
248248

249-
// Remove all collected Loads and GEPs from the kernel.
249+
// Try to remove all collected Loads and their Defs from the kernel.
250250
// PtrUses is returned by `collectGlobalOffsetUses` in topological order.
251251
// Walk it backwards so we don't violate users.
252-
for (auto *I : reverse(PtrUses))
253-
I->eraseFromParent();
252+
for (auto *I : reverse(PtrUses)) {
253+
// A Def might not be a GEP. Remove it if it has no use.
254+
if (I->use_empty())
255+
I->eraseFromParent();
256+
}
254257

255258
// Remove all collected CallInsts from the kernel.
256259
for (auto *U : make_early_inc_range(ImplicitOffsetIntrinsic->users()))
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -bugpoint-enable-legacy-pm -globaloffset %s -S -o - | FileCheck %s
3+
4+
target datalayout = "e-p6:32:32-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64"
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
declare ptr @llvm.nvvm.implicit.offset()
8+
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
9+
10+
define i64 @test_non_gep_phi_use(i32 %x) {
11+
; CHECK-LABEL: define i64 @test_non_gep_phi_use(
12+
; CHECK-SAME: i32 [[X:%.*]]) {
13+
; CHECK-NEXT: [[ENTRY:.*:]]
14+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i32 [[X]] to i1
15+
; CHECK-NEXT: [[CTAID_X:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
16+
; CHECK-NEXT: [[CTAID_Y:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
17+
; CHECK-NEXT: [[CTAID_XY:%.*]] = select i1 [[TRUNC]], i32 [[CTAID_Y]], i32 [[CTAID_X]]
18+
; CHECK-NEXT: [[RES:%.*]] = zext i32 0 to i64
19+
; CHECK-NEXT: ret i64 [[RES]]
20+
;
21+
entry:
22+
%trunc = trunc nuw i32 %x to i1
23+
%ctaid_x = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
24+
%ctaid_y = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
25+
%ctaid_xy = select i1 %trunc, i32 %ctaid_y, i32 %ctaid_x
26+
%offset = tail call ptr @llvm.nvvm.implicit.offset()
27+
%idx = select i1 %trunc, i64 4, i64 0
28+
%gep = getelementptr inbounds nuw i8, ptr %offset, i64 %idx
29+
%load = load i32, ptr %gep, align 4
30+
%res = zext i32 %load to i64
31+
ret i64 %res
32+
}
33+
34+
; CHECK-LABEL: define i64 @test_non_gep_phi_use_with_offset(
35+
; CHECK-SAME: i32 [[X:%.*]], ptr [[PTR:%.*]]) {
36+
; CHECK-NEXT: [[ENTRY:.*:]]
37+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i32 [[X]] to i1
38+
; CHECK-NEXT: [[CTAID_X:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
39+
; CHECK-NEXT: [[CTAID_Y:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
40+
; CHECK-NEXT: [[CTAID_XY:%.*]] = select i1 [[TRUNC]], i32 [[CTAID_Y]], i32 [[CTAID_X]]
41+
; CHECK-NEXT: [[IDX:%.*]] = select i1 [[TRUNC]], i64 4, i64 0
42+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 [[IDX]]
43+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
44+
; CHECK-NEXT: [[RES:%.*]] = zext i32 [[LOAD]] to i64
45+
; CHECK-NEXT: ret i64 [[RES]]
46+
47+
!llvm.module.flags = !{!0}
48+
49+
!0 = !{i32 1, !"sycl-device", i32 1}

0 commit comments

Comments
 (0)