This is right before GVN:

define i32 @foo(<4 x i16> %p, <4 x i16> %p1, i16* nocapture %res) local_unnamed_addr #0 !dbg !6 {

entry:

%temp = alloca i64, align 8

%tmpcast = bitcast i64* %temp to [4 x i16]*

%0 = bitcast i64* %temp to i8*, !dbg !8

call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3, !dbg !8

store i64 0, i64* %temp, align 8, !dbg !9

%1 = bitcast i64* %temp to <4 x i16>*, !dbg !10*

%2 = call <4 x i16> @llvm.XXX.intrinsic(<4 x i16>* nonnull %1, <4 x i16> %p, i32 0), !dbg !11, !tbaa !12

%arrayidx = bitcast i64* %temp to i16*, !dbg !16

%3 = load i16, i16* %arrayidx, align 8, !dbg !16, !tbaa !17

br label %for.body, !dbg !19

for.body: ; preds = %entry

%arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 1, !dbg !20

%4 = load i16, i16* %arrayidx1, align 2, !dbg !20, !tbaa !17

%cmp3 = icmp sgt i16 %3, %4, !dbg !21

%spec.select = select i1 %cmp3, i16 %4, i16 %3, !dbg !22

%arrayidx1.1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 2, !dbg !20

%5 = load i16, i16* %arrayidx1.1, align 2, !dbg !20, !tbaa !17

%cmp3.1 = icmp sgt i16 %spec.select, %5, !dbg !21

%spec.select.1 = select i1 %cmp3.1, i16 %5, i16 %spec.select, !dbg !22

%arrayidx1.2 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 3, !dbg !20

%6 = load i16, i16* %arrayidx1.2, align 2, !dbg !20, !tbaa !17

%cmp3.2 = icmp sgt i16 %spec.select.1, %6, !dbg !21

%spec.select.2 = select i1 %cmp3.2, i16 %6, i16 %spec.select.1, !dbg !22

store i16 %spec.select.2, i16* %res, align 2, !dbg !23, !tbaa !17

%7 = tail call <4 x i16>* @llvm.XXX.intrinsic(<4 x i16>* %2, <4 x i16> %p1, i32 0), !dbg !24, !tbaa !12

%8 = load i16, i16* %arrayidx, align 8, !dbg !25, !tbaa !17

br label %for.body12, !dbg !26

for.body12: ; preds = %for.body

%arrayidx14 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 1, !dbg !27

%9 = load i16, i16* %arrayidx14, align 2, !dbg !27, !tbaa !17

%cmp16 = icmp sgt i16 %8, %9, !dbg !28

%spec.select39 = select i1 %cmp16, i16 %9, i16 %8, !dbg !29

%arrayidx14.1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 2, !dbg !27

%10 = load i16, i16* %arrayidx14.1, align 2, !dbg !27, !tbaa !17

%cmp16.1 = icmp sgt i16 %spec.select39, %10, !dbg !28

%spec.select39.1 = select i1 %cmp16.1, i16 %10, i16 %spec.select39, !dbg !29

%arrayidx14.2 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 3, !dbg !27

%11 = load i16, i16* %arrayidx14.2, align 2, !dbg !27, !tbaa !17

%cmp16.2 = icmp sgt i16 %spec.select39.1, %11, !dbg !28

%spec.select39.2 = select i1 %cmp16.2, i16 %11, i16 %spec.select39.1, !dbg !29

%conv24 = sext i16 %spec.select39.2 to i32, !dbg !30

call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3, !dbg !31

ret i32 %conv24, !dbg !32