LLVM v6.0 Internalize and GlobalDCE PASS can not work together?

Hi LLVM developers,

$ cat hello.c
#include <stdio.h>

void foo() {
}

int main(int argc, char *argv) {
for (int i = 0; i < 10; i++) {
printf("%d\n", i);
}
return 0;
}

$ /opt/llvm-svn/bin/clang --version
Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 6.0.0svn)
Target: x86_64-redhat-linux
Thread model: posix
InstalledDir: /opt/llvm-svn/bin

$ /opt/llvm-svn/bin/clang -Xclang -disable-O0-optnone -S -emit-llvm hello.c -o hello2.ll

$ cat hello2.ll
; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

; Function Attrs: noinline nounwind uwtable
define void @foo() #0 {
entry:
ret void
}

; Function Attrs: noinline nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%i = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
store i32 0, i32* %i, align 4
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%1 = load i32, i32* %i, align 4
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1)
br label %for.inc

for.inc: ; preds = %for.body
%2 = load i32, i32* %i, align 4
%inc = add nsw i32 %2, 1
store i32 %inc, i32* %i, align 4
br label %for.cond

for.end: ; preds = %for.cond
ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (git@github.com:llvm-mirror/clang.git 0aed123216ad4a38a9c2b16f1783895fd5cb1a04) (git@github.com:llvm-mirror/llvm.git d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}

$ /opt/llvm-svn/bin/opt -S -internalize -globaldce hello2.ll -o hello2.dce.ll

$ cat hello2.dce.ll
; ModuleID = 'hello2.ll'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (git@github.com:llvm-mirror/clang.git 0aed123216ad4a38a9c2b16f1783895fd5cb1a04) (git@github.com:llvm-mirror/llvm.git d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}

$ /opt/llvm-svn/bin/lli hello2.dce.ll
'main' function not found in module.

*No* more Alive Functions, so LLVM v6.0 Internalize and GlobalDCE PASS failed to MarkLive Function together?

But LLVM 3.1 is able to work together:

$ ./build/Release+Asserts/bin/opt --version
LLVM (http://llvm.org/):
LLVM version 3.1
Optimized build with assertions.
Built Oct 23 2017 (16:22:51).
Default target: x86_64-unknown-linux-gnu
Host CPU: corei7-avx

$ ./build/Release+Asserts/bin/clang -S -emit-llvm hello.c -o hello0.ll

$ cat hello0.ll
; ModuleID = 'hello.c'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

define void @foo() nounwind uwtable {
entry:
ret void
}

define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%i = alloca i32, align 4
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
store i32 0, i32* %i, align 4
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%0 = load i32* %i, align 4
%cmp = icmp slt i32 %0, 10
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%1 = load i32* %i, align 4
%call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1)
br label %for.inc

for.inc: ; preds = %for.body
%2 = load i32* %i, align 4
%inc = add nsw i32 %2, 1
store i32 %inc, i32* %i, align 4
br label %for.cond

for.end: ; preds = %for.cond
ret i32 0
}

declare i32 @printf(i8*, ...)

$ ./build/Release+Asserts/bin/opt -S -internalize -globaldce hello0.ll -o hello0.dce.ll

$ cat hello0.dce.ll
; ModuleID = 'hello0.ll'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%i = alloca i32, align 4
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
store i32 0, i32* %i, align 4
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%0 = load i32* %i, align 4
%cmp = icmp slt i32 %0, 10
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%1 = load i32* %i, align 4
%call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1)
br label %for.inc

for.inc: ; preds = %for.body
%2 = load i32* %i, align 4
%inc = add nsw i32 %2, 1
store i32 %inc, i32* %i, align 4
br label %for.cond

for.end: ; preds = %for.cond
ret i32 0
}

declare i32 @printf(i8*, ...)

Succeeded drop DeadFunction foo.

Maybe I wrongly use the Internalize and GlobalDCE PASS together? please give me some hints, thanks a lot!

PS: LLVM 6.0 Internalize PASS is able to work, internal foo:

$ /opt/llvm-svn/bin/opt -S -internalize hello2.ll -o hello2.internal.ll

$ cat hello2.internal.ll
; ModuleID = 'hello2.ll'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

; Function Attrs: noinline nounwind uwtable
define internal void @foo() #0 {
entry:
ret void
}

; Function Attrs: noinline nounwind uwtable
define internal i32 @main(i32 %argc, i8** %argv) #0 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%i = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
store i32 0, i32* %i, align 4
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%1 = load i32, i32* %i, align 4
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1)
br label %for.inc

for.inc: ; preds = %for.body
%2 = load i32, i32* %i, align 4
%inc = add nsw i32 %2, 1
store i32 %inc, i32* %i, align 4
br label %for.cond

for.end: ; preds = %for.cond
ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (git@github.com:llvm-mirror/clang.git 0aed123216ad4a38a9c2b16f1783895fd5cb1a04) (git@github.com:llvm-mirror/llvm.git d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}

Workaround patch not the root cause:

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 26db146..a12a8d5 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -115,6 +115,9 @@ bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {

bool InternalizePass::maybeInternalize(
GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ if (GV.getName() == "main")
+ return false;

Hi, Leslie,

When you use internalize, you need to provide it with a list of symbols to preserve as external (otherwise everything will be internalized, including main, and then DCE will remove everything). You can use -internalize-public-api-list=main (a comma-separated list) or -internalize-public-api-file=some_file_name where some_file_name has the list of symbols. See the comments in lib/Transforms/IPO/Internalize.cpp.

  -Hal

Hi Hal,

Thanks for your hint!

$ /opt/llvm-svn/bin/opt -S -internalize -internalize-public-api-list=main -globaldce hello3.ll -o hello3.dce.ll it works :slight_smile:

But I argue that `main` Function should be inserted into ExternalNames by default:

Index: lib/Transforms/IPO/Internalize.cpp

Hi Hal,

Thanks for your hint!

$ /opt/llvm-svn/bin/opt -S -internalize -internalize-public-api-list=main -globaldce hello3.ll -o hello3.dce.ll it works :slight_smile:

But I argue that `main` Function should be inserted into ExternalNames by default:

But "main" is neither necessary nor sufficient depending on the target/configuration. Internalize is really intended to be used by a linker plugin (or in some other context where a callback can be provided that has the right information). Using the pass as you're doing is really a development aid - it's not a user-facing tool by itself.

  -Hal

Hi Hal,

Thanks for your hint!

$ /opt/llvm-svn/bin/opt -S -internalize -internalize-public-api-list=main -globaldce hello3.ll -o hello3.dce.ll it works :slight_smile:

But I argue that `main` Function should be inserted into ExternalNames by default:

But "main" is neither necessary nor sufficient depending on the target/configuration. Internalize is really intended to be used by a linker plugin (or in some other context where a callback can be provided that has the right information). Using the pass as you're doing is really a development aid - it's not a user-facing tool by itself.

Thanks for your teaching! yes, I just use it to remove DeadFunctions for ScaffCC https://github.com/ScaffCC/ScaffCC/blob/master/scaffold/Scaffold.makefile#L122

Ali JavadiAbhari, Shruti Patil, Daniel Kudrow, Jeff Heckey, Alexey Lvov, Frederic Chong and Margaret Martonosi, ScaffCC: A Framework for Compilation and Analysis of Quantum Computing Programs, ACM International Conference on Computing Frontiers (CF 2014), Cagliari, Italy, May 2014