Segmentation fault when using @llvm.global_dtors in Windows

; ModuleID = 'main.ll'
source_filename = "main.ll"
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc19.37.32825"

@.StringLiteral0 = private unnamed_addr constant [3 x i8] c"D\0A\00", align 1

@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @cleanup, ptr null }]

declare i32 @printf(ptr, ...)

define void @cleanup() {
entry:
  call i32 (ptr, ...) @printf(ptr @.StringLiteral0)
  ret void
}

define i32 @main() {
entry:
  ; call void @cleanup()
  ret i32 0
}

This code is a minimal reproducible example of a code that is supposed to call the destructors of global objects on program termination. Since there might be more than one source file to be linked together, there is a cleanup function, in which the destructors of all global variables will be called. All cleanup functions across all files will be appended to the @llvm.global_dtors array and will be called at the end of the program.

This is not what happens, however. This code results in a segmentation fault.

When I compile it and run it in lldb, I get the following backtrace:

(lldb) Process 13320 stopped
* thread #1, stop reason = Exception 0xc0000005 encountered at address 0x7ff860fa3aca: Access violation writing location 0x00000024
    frame #0: 0x00007ff860fa3aca ntdll.dll`TpWorkOnBehalfClearTicket + 394
ntdll.dll`TpWorkOnBehalfClearTicket:
->  0x7ff860fa3aca <+394>: incl   0x24(%rax)
    0x7ff860fa3acd <+397>: movq   0x18(%rdi), %r12
    0x7ff860fa3ad1 <+401>: leaq   0x10(%rdi), %rsi
    0x7ff860fa3ad5 <+405>: movq   %r12, -0x19(%rbp)
(lldb) bt
error: a.exe :: Class '__crt_cached_ptd_host::cached<int>::guard' has a member '_copy' of type '__crt_cached_ptd_host::cached<int>' which does not have a complete definition.
(lldb) * thread #1, stop reason = Exception 0xc0000005 encountered at address 0x7ff860fa3aca: Access violation writing location 0x00000024
  * frame #0: 0x00007ff860fa3aca ntdll.dll`TpWorkOnBehalfClearTicket + 394
    frame #1: 0x00007ff860f918e4 ntdll.dll`RtlEnterCriticalSection + 596
    frame #2: 0x00007ff860f916d2 ntdll.dll`RtlEnterCriticalSection + 66
    frame #3: 0x00007ff6fd3a5761 a.exe`static int __crt_seh_guarded_call<int>::operator(this=0x00007ff6fd4150c8, setup=<unavailable>, setup=0x00000217c1b1aa60, action=0x00000084294ff5c0, action=<unavailable>, action=0x002d0064006c0069, cleanup=0x00000084294ff5b0, cleanup=<unavailable>, cleanup=0x00000084294ff5b0)<<lambda_d854c62834386a3b23916ad6dae2782d>,<lambda_303760bc4008a2b3ec4768a30b06a80c> &,<lambda_4780a7ea4f8cbd2590aec34bd14e2bbf> >(class __acrt_lock_stream_and_call::__l2::<lambda_d854c62834386a3b23916ad6dae2782d> *, class common_vfprintf::__l2::<lambda_303760bc4008a2b3ec4768a30b06a80c> & const, class __acrt_lock_stream_and_call::__l2::<lambda_4780a7ea4f8cbd2590aec34bd14e2bbf> *) at internal_shared.h:201
    frame #4: 0x00007ff6fd3cf41a a.exe`__stdio_common_vfprintf(options=<unavailable>, options=<unavailable>, stream=<unavailable>, stream=<unavailable>, format=<unavailable>, format=<unavailable>, locale=<unavailable>, locale=<unavailable>, arglist="", arglist=" \xff?\xfd\xf6\U0000007f") at output.cpp:61
    frame #5: 0x00007ff6fd39e789 a.exe`printf(_Format=<unavailable>, _Format=<unavailable>, _Format="D\n") at stdio.h:960
    frame #6: 0x00007ff6fd387240 a.exe`cleanup + 16
    frame #7: 0x00007ff6fd3a0abd a.exe`_initterm(first=<unavailable>, first=<unavailable>, last=<unavailable>, last=0x00007ff6fd3fff20, last=<unavailable>) at initterm.cpp:21
    frame #8: 0x00007ff6fd3a0d2c a.exe`static void <lambda_6e4b09c48022b2350581041d5f6b0c4c>::operator(this=0x0000000000000fa0)() at exit.cpp:242
    frame #9: 0x00007ff6fd3a0b6d a.exe`static void __crt_seh_guarded_call<void>::operator(this=0x0000000000000000, setup=<unavailable>, setup=0x0000000000000000, action=0x00000084294ff768, action=<unavailable>, action=0x02100800000906ea, cleanup=0x00000084294ff754, cleanup=<unavailable>, cleanup=0x00000084294ff754)<<lambda_d80eeec6fff315bfe5c115232f3240e3>,<lambda_6e4b09c48022b2350581041d5f6b0c4c> &,<lambda_2358e3775559c9db80273638284d5e45> >(class __acrt_lock_and_call::__l2::<lambda_d80eeec6fff315bfe5c115232f3240e3> *, class common_exit::__l2::<lambda_6e4b09c48022b2350581041d5f6b0c4c> & const, class __acrt_lock_and_call::__l2::<lambda_2358e3775559c9db80273638284d5e45> *) at internal_shared.h:224
    frame #10: 0x00007ff6fd3a0ea3 a.exe`static void common_exit(return_code=<unavailable>, return_code=693105488, return_code=693105488, cleanup_mode=<unavailable>, cleanup_mode=<unavailable>, cleanup_mode=_crt_exit_full_cleanup, return_mode=<unavailable>, return_mode=<unavailable>, return_mode=_crt_exit_terminate_process) at exit.cpp:195
    frame #11: 0x00007ff6fd389d93 a.exe`static int __scrt_common_main_seh() at exe_common.inl:295
    frame #12: 0x00007ff85f1b257d kernel32.dll`BaseThreadInitThunk + 29
    frame #13: 0x00007ff860fcaa58 ntdll.dll`RtlUserThreadStart + 40

When I comment this line:

@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @cleanup, ptr null }]

and uncomment this line:

; call void @cleanup()

or in other words, when I run the cleanup function manually at the end of the main function and get rid of the @llvm.global_dtors array, the program works as intended.

This error happens only on Windows. The same code works as intended on Linux.

According to the backtrace, the %rax has an unexpected value 0 at the point. Consider TpWorkOnBehalfClearTicket is kernel function, it should not be compiler miscomplation.

I don’t think it’s LLVM’s issue either. The IR is simple and cleanup was called successfully.

I have two guess (Note: I’m not expert in OS and FE:)

  • Windows OS issue. The cleanup was called during process exit from common_exit. And I noticed __crt_seh_guarded_call entried twitce. The second call comes from printf and what’s important it calls RtlEnterCriticalSection.
    I noticed one article mentioned some weird behavior when calling EnterCriticalSection during process termination, which seems related to this one.
  • Something wrong druing generating the IR. The LangRef mentioned the function of the third field, which makes me think if it shouldn’t be a null here.

Anyway, I cannot get more clue from current information. I think it’d be better if you can provide a simple C file for others’ further investigation.

1 Like

This is a simplified version of an IR code that a compiler I’m working on produces. It calls the destructors of all global objects in the cleanup function. The multiple versions of the cleanup function (each with a unique suffix) are all appended in the @llvm.global_dtors array and called one after the other.

It seems ok to call the cleanup function at the end of the main function. I’m wondering whether there is a way to create my own appending array, iterate over its elements, and call them one by one.

I tried modifying the IR code so that the name of the @llvm.global_dtors is different, but I always ended up with an error that looks like this:

fatal error: error in backend: unknown special variable
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.      Program arguments: "C:\\Program Files\\LLVM\\bin\\clang.exe" -cc1 -triple x86_64-pc-windows-msvc19.37.32825 -emit-obj -mrelax-all -mincremental-linker-compatible -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name dest.ll -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -gno-column-info -gcodeview -debug-info-kind=constructor -fcoverage-compilation-dir=E:\\Dua\\cmake-build-debug -resource-dir "C:\\Program Files\\LLVM\\lib\\clang\\17" -fdebug-compilation-dir=E:\\Dua\\cmake-build-debug -ferror-limit 19 -fmessage-length=209 -fno-use-cxa-atexit -fms-extensions -fms-compatibility -fms-compatibility-version=19.37.32825 -fdelayed-template-parsing -fcolor-diagnostics -faddrsig -o C:\\Users\\Osama\\AppData\\Local\\Temp\\dest-848a1f.o -x ir dest.ll
1.      Code generation
clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
clang version 17.0.2
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: C:\Program Files\LLVM\bin
clang: note: diagnostic msg: Error generating preprocessed source(s) - no preprocessable inputs.

This suggests that making my own appending global array is not permitted.

I wonder if there is any workaround, such as iterating through all elements of the @llvm.global_dtors array manually at the end of the main function, then replacing each function pointer with an address of an empty function or something similar. In this case, how will I get the length of the @llvm.global_dtors array? I’ve read some comments that suggest that it’s null-terminated.

I think your IR is correct, you are using @llvm.global_dtors correctly as intended.

I think the CRT is crashing because stack unwinding has failed somehow, probably because it couldn’t unwind past the cleanup stack frame, but I’m not sure what to do next. My next idea was to apply the uwtable attribute to cleanup, but it seems that’s unnecessary.

Another thing to note is that printf is defined inline in stdio.h these days. Does the problem go away if you call puts instead of printf?

1 Like

Calling puts doesn’t make a difference. I get the same backtrace as calling printf:

Process 23728 stopped
* thread #1, stop reason = Exception 0xc0000005 encountered at address 0x7ff860fa3aca: Access violation writing location 0x00000024
    frame #0: 0x00007ff860fa3aca ntdll.dll`TpWorkOnBehalfClearTicket + 394
ntdll.dll`TpWorkOnBehalfClearTicket:
->  0x7ff860fa3aca <+394>: incl   0x24(%rax)
    0x7ff860fa3acd <+397>: movq   0x18(%rdi), %r12
    0x7ff860fa3ad1 <+401>: leaq   0x10(%rdi), %rsi
    0x7ff860fa3ad5 <+405>: movq   %r12, -0x19(%rbp)
(lldb) bt
error: crash.exe :: Class '__crt_cached_ptd_host::cached<int>::guard' has a member '_copy' of type '__crt_cached_ptd_host::cached<int>' which does not have a complete definition.
(lldb) * thread #1, stop reason = Exception 0xc0000005 encountered at address 0x7ff860fa3aca: Access violation writing location 0x00000024
  * frame #0: 0x00007ff860fa3aca ntdll.dll`TpWorkOnBehalfClearTicket + 394
    frame #1: 0x00007ff860f918e4 ntdll.dll`RtlEnterCriticalSection + 596
    frame #2: 0x00007ff860f916d2 ntdll.dll`RtlEnterCriticalSection + 66
    frame #3: 0x00007ff7abf8e419 crash.exe`static int __crt_seh_guarded_call<int>::operator(this=0x0000000000000001, setup=<unavailable>, setup=0x00007ff860faab11, action=0x000000d3fd0ff7c0, action=<unavailable>, action=0x0000026455d3ba70, cleanup=0x000000d3fd0ff7b0, cleanup=<unavailable>, cleanup=0x000000d3fd0ff7b0)<<lambda_7ea02354adac61b93e3a5c2c4100d85b>,<lambda_8abd18a77b33e1dd4bdebee267cfa707> &,<lambda_eba452c8b8afd9ffb9102a507ee4e553> >(class __acrt_lock_stream_and_call::__l2::<lambda_7ea02354adac61b93e3a5c2c4100d85b> *, class _puts_internal::__l2::<lambda_8abd18a77b33e1dd4bdebee267cfa707> & const, class __acrt_lock_stream_and_call::__l2::<lambda_eba452c8b8afd9ffb9102a507ee4e553> *) at internal_shared.h:201
    frame #4: 0x00007ff7abf8ebc6 crash.exe`puts(string=<unavailable>, string=<unavailable>, string=<unavailable>) at puts.cpp:44
    frame #5: 0x00007ff7abf77170 crash.exe`cleanup + 16
    frame #6: 0x00007ff7abf90f91 crash.exe`_initterm(first=<unavailable>, first=<unavailable>, last=<unavailable>, last=0x00007ff7abff0f20, last=<unavailable>) at initterm.cpp:21
    frame #7: 0x00007ff7abf911fc crash.exe`static void <lambda_6e4b09c48022b2350581041d5f6b0c4c>::operator(this=0x0000000000000fa0)() at exit.cpp:242
    frame #8: 0x00007ff7abf91041 crash.exe`static void __crt_seh_guarded_call<void>::operator(this=0x0000000000000000, setup=<unavailable>, setup=0x0000000000000000, action=0x000000d3fd0ff958, action=<unavailable>, action=0x0a100800000906ea, cleanup=0x000000d3fd0ff944, cleanup=<unavailable>, cleanup=0x000000d3fd0ff944)<<lambda_d80eeec6fff315bfe5c115232f3240e3>,<lambda_6e4b09c48022b2350581041d5f6b0c4c> &,<lambda_2358e3775559c9db80273638284d5e45> >(class __acrt_lock_and_call::__l2::<lambda_d80eeec6fff315bfe5c115232f3240e3> *, class common_exit::__l2::<lambda_6e4b09c48022b2350581041d5f6b0c4c> & const, class __acrt_lock_and_call::__l2::<lambda_2358e3775559c9db80273638284d5e45> *) at internal_shared.h:224
    frame #9: 0x00007ff7abf9136f crash.exe`static void common_exit(return_code=<unavailable>, return_code=-49285744, return_code=-49285744, cleanup_mode=<unavailable>, cleanup_mode=<unavailable>, cleanup_mode=_crt_exit_full_cleanup, return_mode=<unavailable>, return_mode=<unavailable>, return_mode=_crt_exit_terminate_process) at exit.cpp:195
    frame #10: 0x00007ff7abf79c93 crash.exe`static int __scrt_common_main_seh() at exe_common.inl:295
    frame #11: 0x00007ff85f1b257d kernel32.dll`BaseThreadInitThunk + 29
    frame #12: 0x00007ff860fcaa58 ntdll.dll`RtlUserThreadStart + 40

Also, applying the uwtable attribute didn’t make a difference

It occurs to me that clang doesn’t actually use llvm.global_dtors, so this is pretty untested functionality. Maybe we got the finalizer prototypes completely wrong, and maybe that’s causing the issue.

As a workaround, you should be able to replace this with an initializer which registers a finalizer with atexit instead. Otherwise, I think you may have found a real bug in the llvm.global_dtors implementation that’s worth filing.

1 Like

You can get clang to use llvm.global_dtors if you use the destructor attribute directly Compiler Explorer. My guess is that there’s something weird going on in the lowering.

The example is for Linux but the issue is Windows specific.