; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=memcpyopt < %s -verify-memoryssa | FileCheck %s

define i8 @read_dest_between_call_and_memcpy() {
; CHECK-LABEL: @read_dest_between_call_and_memcpy(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    store i8 1, ptr [[DEST]], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    [[X:%.*]] = load i8, ptr [[DEST]], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DEST]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    ret i8 [[X]]
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  store i8 1, ptr %dest
  call void @llvm.memset.p0.i64(ptr %src, i8 0, i64 16, i1 false)
  %x = load i8, ptr %dest
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret i8 %x
}

define i8 @read_src_between_call_and_memcpy() {
; CHECK-LABEL: @read_src_between_call_and_memcpy(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    [[X:%.*]] = load i8, ptr [[SRC]], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DEST]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    ret i8 [[X]]
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  call void @llvm.memset.p0.i64(ptr %src, i8 0, i64 16, i1 false)
  %x = load i8, ptr %src
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret i8 %x
}

define void @write_dest_between_call_and_memcpy() {
; CHECK-LABEL: @write_dest_between_call_and_memcpy(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    store i8 1, ptr [[DEST]], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DEST]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  call void @llvm.memset.p0.i64(ptr %src, i8 0, i64 16, i1 false)
  store i8 1, ptr %dest
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret void
}

define void @write_src_between_call_and_memcpy() {
; CHECK-LABEL: @write_src_between_call_and_memcpy(
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    store i8 1, ptr [[SRC]], align 1
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  call void @llvm.memset.p0.i64(ptr %src, i8 0, i64 16, i1 false)
  store i8 1, ptr %src
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret void
}

define void @throw_between_call_and_mempy(ptr writable dereferenceable(16) %dest.i8) {
; CHECK-LABEL: @throw_between_call_and_mempy(
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    call void @may_throw() #[[ATTR2:[0-9]+]]
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DEST_I8:%.*]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    ret void
;
  %src = alloca [16 x i8]
  call void @llvm.memset.p0.i64(ptr %src, i8 0, i64 16, i1 false)
  call void @may_throw() readnone
  call void @llvm.memcpy.p0.p0.i64(ptr %dest.i8, ptr %src, i64 16, i1 false)
  ret void
}

define void @dest_is_gep_nounwind_call() {
; CHECK-LABEL: @dest_is_gep_nounwind_call(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [8 x i8], align 1
; CHECK-NEXT:    [[DEST_I8:%.*]] = getelementptr [16 x i8], ptr [[DEST]], i64 0, i64 8
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST_I8]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [8 x i8]
  %dest.i8 = getelementptr [16 x i8], ptr %dest, i64 0, i64 8
  call void @accept_ptr(ptr %src) nounwind
  call void @llvm.memcpy.p0.p0.i64(ptr %dest.i8, ptr %src, i64 8, i1 false)
  ret void
}

define void @dest_is_gep_may_throw_call() {
; CHECK-LABEL: @dest_is_gep_may_throw_call(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [8 x i8], align 1
; CHECK-NEXT:    [[DEST_I8:%.*]] = getelementptr [16 x i8], ptr [[DEST]], i64 0, i64 8
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST_I8]])
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [8 x i8]
  %dest.i8 = getelementptr [16 x i8], ptr %dest, i64 0, i64 8
  call void @accept_ptr(ptr %src)
  call void @llvm.memcpy.p0.p0.i64(ptr %dest.i8, ptr %src, i64 8, i1 false)
  ret void
}

define void @dest_is_gep_requires_movement() {
; CHECK-LABEL: @dest_is_gep_requires_movement(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [8 x i8], align 1
; CHECK-NEXT:    [[DEST_I8:%.*]] = getelementptr [16 x i8], ptr [[DEST]], i64 0, i64 8
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST_I8]]) #[[ATTR3]]
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [8 x i8]
  call void @accept_ptr(ptr %src) nounwind
  %dest.i8 = getelementptr [16 x i8], ptr %dest, i64 0, i64 8
  call void @llvm.memcpy.p0.p0.i64(ptr %dest.i8, ptr %src, i64 8, i1 false)
  ret void
}

define void @capture_before_call_argmemonly() {
; CHECK-LABEL: @capture_before_call_argmemonly(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST]])
; CHECK-NEXT:    call void @accept_ptr(ptr captures(none) [[DEST]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  call void @accept_ptr(ptr %dest) ; capture
  call void @accept_ptr(ptr nocapture %src) argmemonly
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret void
}

define void @capture_before_call_argmemonly_nounwind() {
; CHECK-LABEL: @capture_before_call_argmemonly_nounwind(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST]])
; CHECK-NEXT:    call void @accept_ptr(ptr captures(none) [[DEST]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  call void @accept_ptr(ptr %dest) ; capture
  ; NB: argmemonly currently implies willreturn.
  call void @accept_ptr(ptr nocapture %src) argmemonly nounwind
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret void
}

define void @capture_before_call_argmemonly_nounwind_willreturn() {
; CHECK-LABEL: @capture_before_call_argmemonly_nounwind_willreturn(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST]])
; CHECK-NEXT:    call void @accept_ptr(ptr captures(none) [[DEST]]) #[[ATTR6:[0-9]+]]
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  call void @accept_ptr(ptr %dest) ; capture
  call void @accept_ptr(ptr nocapture %src) argmemonly nounwind willreturn
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret void
}

; There is no path from the capture back to the memcpy.
; So we are allowed to perform the call slot optimization.
define void @capture_nopath_call(i1 %cond) {
; CHECK-LABEL: @capture_nopath_call(
; CHECK-NEXT:    [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT:    br i1 [[COND:%.*]], label [[CAPTURES:%.*]], label [[NOCAPTURES:%.*]]
; CHECK:       captures:
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST]])
; CHECK-NEXT:    ret void
; CHECK:       nocaptures:
; CHECK-NEXT:    call void @accept_ptr(ptr [[DEST]]) #[[ATTR3]]
; CHECK-NEXT:    ret void
;
  %dest = alloca [16 x i8]
  %src = alloca [16 x i8]
  br i1 %cond, label %captures, label %nocaptures

captures:
  call void @accept_ptr(ptr %dest) ; capture
  ret void

nocaptures:
  call void @accept_ptr(ptr %src) nounwind
  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
  ret void
}

define void @source_alignment(ptr noalias writable dereferenceable(128) %dst) {
; CHECK-LABEL: @source_alignment(
; CHECK-NEXT:    [[SRC:%.*]] = alloca [128 x i8], align 4
; CHECK-NEXT:    call void @accept_ptr(ptr captures(none) [[DST:%.*]]) #[[ATTR3]]
; CHECK-NEXT:    ret void
;
  %src = alloca [128 x i8], align 4
  call void @accept_ptr(ptr nocapture %src) nounwind
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr %src, i64 128, i1 false)
  ret void
}

define void @dest_not_writable(ptr noalias dereferenceable(128) %dst) {
; CHECK-LABEL: @dest_not_writable(
; CHECK-NEXT:    [[SRC:%.*]] = alloca [128 x i8], align 4
; CHECK-NEXT:    call void @accept_ptr(ptr captures(none) [[SRC]]) #[[ATTR3]]
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST:%.*]], ptr [[SRC]], i64 128, i1 false)
; CHECK-NEXT:    ret void
;
  %src = alloca [128 x i8], align 4
  call void @accept_ptr(ptr nocapture %src) nounwind
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr %src, i64 128, i1 false)
  ret void
}

declare void @may_throw()
declare void @accept_ptr(ptr)
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
