We’ve got the following test case:
class A {
public:
int x;
A(int x) : x(x) {}
int hoo() {return 4;}
virtual int foo() {return x;}
virtual int goo() {return foo()+10;}
virtual int operator+(A &a) {
return x + a.x;
}
};
class B : public A {
public:
B(int x) : A(x) {}
int hoo() {return 2;}
virtual int foo() {return A::foo()*2;}
};
int main() {
A* a = new A(1);
B* b = new B(2);
int y = a->foo() + b->goo() + a->hoo() + b->hoo() + (*a + *b);
delete a; delete b;
return y;
}
Clang with -O4 -S -emit-llvm emits the following for main():
define i32 @main() {
%1 = tail call noalias i8* @_Znwm(i64 16), !dbg !70
%2 = bitcast i8* %1 to %class.A*, !dbg !70
tail call void @llvm.dbg.value(metadata !{%class.A* %2}, i64 0, metadata !68)
tail call void @llvm.dbg.value(metadata !71, i64 0, metadata !69)
tail call void @llvm.dbg.value(metadata !{%class.A* %2}, i64 0, metadata !66)
tail call void @llvm.dbg.value(metadata !71, i64 0, metadata !67)
%3 = bitcast i8* %1 to i32 (…)***
store i32 (…)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (…)), i32 (…)* %3, align 8
%4 = getelementptr inbounds i8* %1, i64 8
%5 = bitcast i8* %4 to i32*
store i32 1, i32* %5, align 4, !tbaa !72
tail call void @llvm.dbg.value(metadata !{%class.A* %2}, i64 0, metadata !49), !dbg !70
%6 = tail call noalias i8* @_Znwm(i64 16), !dbg !75
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !57)
tail call void @llvm.dbg.value(metadata !76, i64 0, metadata !58)
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !59)
tail call void @llvm.dbg.value(metadata !76, i64 0, metadata !60)
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !66)
tail call void @llvm.dbg.value(metadata !76, i64 0, metadata !67)
%7 = getelementptr inbounds i8* %6, i64 8
%8 = bitcast i8* %7 to i32*
store i32 2, i32* %8, align 4, !tbaa !72
%9 = bitcast i8* %6 to i8***
store i8** getelementptr inbounds ([5 x i8*]* @_ZTV1B, i64 0, i64 2), i8*** %9, align 8
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !52), !dbg !75
%10 = bitcast i8* %1 to i32 (%class.A*), !dbg !77
%11 = load i32 (%class.A)* %10, align 8, !dbg !77
%12 = load i32 (%class.A*)** %11, align 8, !dbg !77
%13 = tail call i32 %12(%class.A* %2), !dbg !77
%14 = bitcast i8* %6 to %class.A*, !dbg !77
%15 = bitcast i8* %6 to i32 (%class.A*), !dbg !77
%16 = load i32 (%class.A)* %15, align 8, !dbg !77
%17 = getelementptr inbounds i32 (%class.A*)** %16, i64 1, !dbg !77
%18 = load i32 (%class.A*)** %17, align 8, !dbg !77
%19 = tail call i32 %18(%class.A* %14), !dbg !77
%20 = bitcast i8* %1 to i32 (%class.A*, %class.A*), !dbg !77
%21 = load i32 (%class.A, %class.A)** %20, align 8, !dbg !77
%22 = getelementptr inbounds i32 (%class.A*, %class.A*)** %21, i64 2, !dbg !77
%23 = load i32 (%class.A*, %class.A*)** %22, align 8, !dbg !77
%24 = tail call i32 %23(%class.A* %2, %class.A* %14), !dbg !77
%25 = add i32 %13, 6, !dbg !77
%26 = add i32 %25, %19, !dbg !77
%27 = add i32 %26, %24, !dbg !77
tail call void @llvm.dbg.value(metadata !{i32 %27}, i64 0, metadata !54), !dbg !77
%28 = icmp eq i8* %1, null, !dbg !78
br i1 %28, label %30, label %29, !dbg !78
; :29 ; preds = %0
tail call void @_ZdlPv(i8* %1) nounwind, !dbg !78
br label %30, !dbg !78
; :30 ; preds = %29, %0
%31 = icmp eq i8* %6, null, !dbg !78
br i1 %31, label %33, label %32, !dbg !78
; :32 ; preds = %30
tail call void @_ZdlPv(i8* %6) nounwind, !dbg !78
br label %33, !dbg !78
; :33 ; preds = %32, %30
ret i32 %27, !dbg !79
}
It’s a bit long-winded, but from looking at the code it’s clear that no virtual calls are actually necessary, yet Clang and LLVM generated both of them.
In particular, we seek to implement the sort of analysis for devirtualization by Sonajalg et al in http://www.cs.ut.ee/~varmo/seminar/sem09S/final/s6najalg.pdf but in C++, even if all we can get is a more conservative approximation in most cases. It’s basically a lot of type analysis, and involves querying properties about types -lower- in the hierarchy than the declared or instantiated type, which doesn’t seem to be an operation supported by Clang or debug metadata directly, so the only option (as we see it) is to build a custom representation of the class hierarchy from data we -do- have access to which allows it. Well, or generate even more metadata.
It’s unclear to me how much assert/assume features would help. I can see it as useful for simplifying the process of determining how much precision is needed (e.g. in a file-scoped function, we know that its arguments can’t come from somewhere external and hence could actually determine what the “lowest” type arguments can be), but it’s unclear how this per se helps with obtaining type information, since the -g flag seems to generate sufficient data, but with no clear way to access it. It could just be myopia and ignorance on my part, though.
Thanks for your help,
-Vitor