Skip to content

[InstCombine] Support offsets in memset to load forwarding #151924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -631,9 +631,13 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
if (!Val || !Len)
return nullptr;

// TODO: Handle offsets.
Value *Dst = MSI->getDest();
if (!AreEquivalentAddressValues(Dst, Ptr))
// Handle offsets.
int64_t StoreOffset = 0, LoadOffset = 0;
const Value *StoreBase =
GetPointerBaseWithConstantOffset(MSI->getDest(), StoreOffset, DL);
const Value *LoadBase =
GetPointerBaseWithConstantOffset(Ptr, LoadOffset, DL);
if (StoreBase != LoadBase || LoadOffset < StoreOffset)
return nullptr;

if (IsLoadCSE)
Expand All @@ -645,7 +649,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,

// Make sure the read bytes are contained in the memset.
uint64_t LoadSize = LoadTypeSize.getFixedValue();
if ((Len->getValue() * 8).ult(LoadSize))
if ((Len->getValue() * 8).ult(LoadSize + (LoadOffset - StoreOffset) * 8))
return nullptr;

APInt Splat = LoadSize >= 8 ? APInt::getSplat(LoadSize, Val->getValue())
Expand Down
13 changes: 2 additions & 11 deletions llvm/test/Analysis/GlobalsModRef/memset-escape.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,14 @@ target triple = "x86_64-apple-macosx10.10.0"
@a = internal global [3 x i32] zeroinitializer, align 4
@b = common global i32 0, align 4

; The important thing we're checking for here is the reload of (some element of)
; @a after the memset.
; The important thing we're checking here is that the value from the memset
; rather than the preceding store is forwarded.

define i32 @main() {
; CHECK-LABEL: define noundef i32 @main(
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: store i32 1, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 4
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(12) @a, i8 0, i64 12, i1 false)
; CHECK-NEXT: store i32 3, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 4
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
; CHECK: [[IF_THEN]]:
; CHECK-NEXT: tail call void @abort()
; CHECK-NEXT: unreachable
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
Expand Down
40 changes: 36 additions & 4 deletions llvm/test/Transforms/InstCombine/load-store-forward.ll
Original file line number Diff line number Diff line change
Expand Up @@ -365,20 +365,52 @@ define i32 @load_after_memset_unknown(ptr %a, i8 %byte) {
ret i32 %v
}

; TODO: Handle load at offset.
define i32 @load_after_memset_0_offset(ptr %a) {
; CHECK-LABEL: @load_after_memset_0_offset(
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: ret i32 [[V]]
; CHECK-NEXT: ret i32 0
;
call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
%gep = getelementptr i8, ptr %a, i64 4
%v = load i32, ptr %gep
ret i32 %v
}

define i32 @load_after_memset_1_offset(ptr %a) {
; CHECK-LABEL: @load_after_memset_1_offset(
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
; CHECK-NEXT: ret i32 16843009
;
call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
%gep = getelementptr i8, ptr %a, i64 4
%v = load i32, ptr %gep
ret i32 %v
}

define i1 @load_after_memset_0_offset_i1(ptr %a) {
; CHECK-LABEL: @load_after_memset_0_offset_i1(
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
; CHECK-NEXT: ret i1 false
;
call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
%gep = getelementptr i1, ptr %a, i64 12
%v = load i1, ptr %gep
ret i1 %v
}

define i8 @neg_load_after_memset_0_neg_offset(ptr %a) {
; CHECK-LABEL: @neg_load_after_memset_0_neg_offset(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 2
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[GEP]], i8 0, i64 16, i1 false)
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[A]], align 1
; CHECK-NEXT: ret i8 [[V]]
;
%gep = getelementptr i8, ptr %a, i64 2
call void @llvm.memset.p0.i64(ptr %gep, i8 0, i64 16, i1 false)
%v = load i8, ptr %a
ret i8 %v
}

define i32 @load_after_memset_0_offset_too_large(ptr %a) {
; CHECK-LABEL: @load_after_memset_0_offset_too_large(
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
Expand Down