Compiling LLVM with CSIR PGO

Hey, recently I’ve been trying to build LLVM with all the fancy features provided by CMake options but I’m struggling with 2 topics:

  • How to make it so that the profiled compiler tries to compile all or part of LLVM instead of the short test program? AdvancedBuilds.rst comments stage2-instrumented-generate-profdata with “If you let that run for a few hours or so [..]” suggesting that the process takes considerable amount of time. However what I’ve found is that the profiling takes just a few seconds by default and any attempt to point CMake to another CMakeLists.txt via CLANG_PGO_TRAINING_DATA_SOURCE_DIR results in the following error:
    [3295/3403] cd /home/user/src/llvm/build/tools/clang/stage2-instrumented-bins/tools/clang/utils/perf-training && /usr/bin/python3.13 /home/user/src/llvm/build/tools/clang/stage2-instrumented-bins/./bin/llvm-lit -sv /home/user/src/llvm/build/tools/clang/stage2-instrumented-bins/tools/clang/utils/perf-training/pgo-data/
    llvm-lit: /home/user/src/llvm/llvm/utils/lit/lit/TestingConfig.py:156: fatal: unable to parse config file '/home/user/src/llvm/clang/utils/perf-training/lit.cfg', traceback: Traceback (most recent call last):
      File "/home/user/src/llvm/llvm/utils/lit/lit/TestingConfig.py", line 144, in load_from_path
        exec(compile(data, path, "exec"), cfg_globals, None)
        ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/user/src/llvm/clang/utils/perf-training/lit.cfg", line 25, in <module>
        config.clang = lit.util.which('clang', config.clang_tools_dir).replace('\\', '/')
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    AttributeError: 'NoneType' object has no attribute 'replace'
    
  • With CSIR PGO enabled, I can count 26163 occurrences of warning: /home/user/src/llvm/llvm/lib/Demangle/Demangle.cpp: function control flow change detected (hash mismatch) _ZN4llvm8demangleB5cxx11ESt17basic_string_viewIcSt11char_traitsIcEE Hash = 2249543095032649673 up to 0 count discarded [-Wbackend-plugin] and similar warnings. Considering the profile data passed to -fprofile-instr-use was generated for the exact same source code compiled as part of Ninja’s stage2-instrumented target, I’m not sure what this is all about or how it impacts the quality of profile-guided optimizations. How can I address this issue?

I’m using the following commands to run the build:

find /tmp/root build -not -type d -not -path */lto.cache/* -delete
find /tmp/root build -type d -empty -delete

export VERBOSE=1

{
    cmake -S llvm -B build -G Ninja -C CMakeCache.txt -D CMAKE_INSTALL_PREFIX=/tmp/root
    ninja -C build stage2
} > /tmp/ninja.txt 2>&1

Unfortunately I cannot upload files so here’s my CMakeCache.txt and the patch for llvmorg-20.1.8 that I used to get LLVM_BUILD_STATIC to work:

set(CMAKE_BUILD_TYPE Release CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")

set(LLVM_ENABLE_PROJECTS clang lld polly CACHE STRING "")
set(LLVM_ENABLE_RUNTIMES libcxx libcxxabi libunwind compiler-rt openmp CACHE STRING "")
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR OFF CACHE BOOL "")
set(LLVM_ENABLE_EH ON CACHE BOOL "")
set(LLVM_ENABLE_RTTI ON CACHE BOOL "")
set(LLVM_ENABLE_LIBXML2 OFF CACHE BOOL "")
set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "")
set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
set(LLVM_BUILD_STATIC ON CACHE BOOL "")

set(CLANG_BOOTSTRAP_PASSTHROUGH CMAKE_INSTALL_PREFIX CACHE STRING "")
set(CLANG_BOOTSTRAP_TARGETS generate-profdata stage2 stage2-clang stage2-install CACHE STRING "")
set(CLANG_DEFAULT_PIE_ON_LINUX OFF CACHE BOOL "")
set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
set(CLANG_ENABLE_ARCMT OFF CACHE BOOL "")
set(CLANG_ENABLE_STATIC_ANALYZER OFF CACHE BOOL "")

set(BOOTSTRAP_CMAKE_C_FLAGS "-march=native -mtune=native -mllvm -polly -mllvm -polly-vectorizer=stripmine" CACHE STRING "")
set(BOOTSTRAP_CMAKE_CXX_FLAGS "-march=native -mtune=native -mllvm -polly -mllvm -polly-vectorizer=stripmine" CACHE STRING "")
set(BOOTSTRAP_CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(BOOTSTRAP_CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")

set(BOOTSTRAP_LLVM_ENABLE_PER_TARGET_RUNTIME_DIR OFF CACHE BOOL "")
set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "")
set(BOOTSTRAP_LLVM_ENABLE_LTO Thin CACHE STRING "")
set(BOOTSTRAP_LLVM_ENABLE_EH ON CACHE BOOL "")
set(BOOTSTRAP_LLVM_ENABLE_RTTI ON CACHE BOOL "")
set(BOOTSTRAP_LLVM_ENABLE_LIBXML2 OFF CACHE BOOL "")
set(BOOTSTRAP_LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "")
set(BOOTSTRAP_LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
set(BOOTSTRAP_LLVM_INCLUDE_TESTS ON CACHE BOOL "")
set(BOOTSTRAP_LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED CSIR CACHE BOOL "")
set(BOOTSTRAP_LLVM_BUILD_STATIC ON CACHE BOOL "")

set(BOOTSTRAP_CLANG_BOOTSTRAP_PASSTHROUGH CMAKE_INSTALL_PREFIX CACHE STRING "")
set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS clang CACHE STRING "")
set(BOOTSTRAP_CLANG_DEFAULT_PIE_ON_LINUX OFF CACHE BOOL "")
set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
set(BOOTSTRAP_CLANG_ENABLE_ARCMT OFF CACHE BOOL "")
set(BOOTSTRAP_CLANG_ENABLE_STATIC_ANALYZER OFF CACHE BOOL "")
diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt
index 00a1223c08..d7aea2d83c 100644
--- a/clang/tools/libclang/CMakeLists.txt
+++ b/clang/tools/libclang/CMakeLists.txt
@@ -84,9 +84,6 @@ elseif (CLANG_BUILT_STANDALONE)
   endif ()
 endif ()
 
-option(LIBCLANG_BUILD_STATIC
-  "Build libclang as a static library (in addition to a shared one)" OFF)
-
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_BINARY_DIR}/libclang-generic.exports)
 set(LIBCLANG_VERSION_SCRIPT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libclang.map)
 
@@ -111,11 +108,11 @@ if (LLVM_EXPORTED_SYMBOL_FILE)
                      DEPENDS ${LIBCLANG_VERSION_SCRIPT_FILE})
 endif()
 
-if(LLVM_ENABLE_PIC OR (WIN32 AND NOT LIBCLANG_BUILD_STATIC))
+if(LLVM_ENABLE_PIC OR (WIN32 AND NOT LLVM_BUILD_STATIC))
   set(ENABLE_SHARED SHARED)
 endif()
 
-if(NOT LLVM_ENABLE_PIC OR LIBCLANG_BUILD_STATIC)
+if(NOT LLVM_ENABLE_PIC OR LLVM_BUILD_STATIC)
   set(ENABLE_STATIC STATIC)
 endif()
 
@@ -258,4 +255,3 @@ if(NOT LLVM_ENABLE_IDE)
                            COMPONENT
                              libclang-python-bindings)
 endif()
-
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index d3e9377c8d..e9dc7f88aa 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -297,24 +297,6 @@ endif()
 
 function(add_link_opts target_name)
   get_llvm_distribution(${target_name} in_distribution in_distribution_var)
-  if(NOT in_distribution)
-    # Don't LTO optimize targets that aren't part of any distribution.
-    if (LLVM_ENABLE_LTO)
-      # We may consider avoiding LTO altogether by using -fembed-bitcode
-      # and teaching the linker to select machine code from .o files, see
-      # https://lists.llvm.org/pipermail/llvm-dev/2021-April/149843.html
-      if((UNIX OR MINGW) AND LINKER_IS_LLD)
-        set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                      LINK_FLAGS " -Wl,--lto-O0")
-      elseif(LINKER_IS_LLD_LINK)
-        set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                      LINK_FLAGS " /opt:lldlto=0")
-      elseif(APPLE AND NOT uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
-        set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                      LINK_FLAGS " -Wl,-mllvm,-O0")
-      endif()
-    endif()
-  endif()
 
   # Don't use linker optimizations in debug builds since it slows down the
   # linker in a context where the optimizations are not important.

@xur