Get the delayed-load function binding correctly written into the image executable (dlltool)

Dear All,

I’ve been studying the delayed-load (delayimp) pipeline as a possible backend for the missing RPATH functionality on Windows, by the following example:

#include <stdio.h>

int __declspec(dllimport) foo(int arg);

int main(int argc, char* argv[])
{
printf(“foo() = %d\n”, foo(foo(argc)));
return 0;

}

Both GNU and LLVM implement delayed loading similarly with the dlltool (yet, LLVM’s dlltool seems to have merged into ld-link). Essentially, the task performed in LLVM’s lld/COFF/DLL.cpp or BinUtil’s dlltool.c is two-fold:

  1. Generate jump table stub for a delayed-load function (see example below)
  2. Generate a trampoline that shall deploy the __delayLoadHelper2 code (see example below)

Upon the successful binding, the __delayLoadHelper2 seems to write a resolved function address right into the executable code section:

extern “C”
FARPROC WINAPI
__delayLoadHelper2(
PCImgDelayDescr pidd,
FARPROC * ppfnIATEntry
) {

SetEntryHookBypass:
*ppfnIATEntry = pfnRet; // access violation

}

In order for executable image modification, Microsoft has developed some fancy functions that temporarily add write permissions to the corresponding memory region.

Now the question is: the code to be modified is within the jump table stub that goes into “.idata” section, and it fails to get write permissions:

if ((Characteristics & IMAGE_SCN_MEM_WRITE) == 0) {

//
// This delay load helper module does not support merging the delay
// load section to a read only section because memory management
// would not guarantee that there is commit available - and thus a
// low memory failure path where the delay load failure hook could
// not be safely invoked (the delay load section would still be
// read only) might be encountered.
//
// It is a build time configuration problem to produce such a
// binary so abort here and now so that the problem can be
// identified & fixed.
//

/* Exception thrown at 0x000000013F3B3F3F in dlltool_test_executable.exe: 0xC0000005: Access violation reading */
__fastfail(FAST_FAIL_DLOAD_PROTECTION_FAILURE);
}

So, currently the hard-binding does not work, and gives “write access violation”. I’m wondering what kind of “build-time configuration” am I missing here?

My test config: LLVM upstream from github, BinUtils upstream from git, MSVC2019, Windows 7.

I’m posting this also to StackOverflow: https://stackoverflow.com/questions/63559263/

Kind regards,

  • Dmitry.

$ cat trampoline.s

Import trampoline

.section .text
.global __tailMerge_C__Users_marcusmae_dlltool_build_import_test_lib
__tailMerge_C__Users_marcusmae_dlltool_build_import_test_lib:
pushq %rcx
pushq %rdx
pushq %r8
pushq %r9
subq $40, %rsp
movq %rax, %rdx
leaq __DELAY_IMPORT_DESCRIPTOR_C__Users_marcusmae_dlltool_build_import_test_lib(%rip), %rcx
call __delayLoadHelper2
addq $40, %rsp
popq %r9
popq %r8
popq %rdx
popq %rcx
jmp *%rax

DELAY_IMPORT_DESCRIPTOR

.section .text$2
.global __DELAY_IMPORT_DESCRIPTOR_C__Users_marcusmae_dlltool_build_import_test_lib
__DELAY_IMPORT_DESCRIPTOR_C__Users_marcusmae_dlltool_build_import_test_lib:
.long 1 # grAttrs
.rva __C__Users_marcusmae_dlltool_build_import_test_lib_iname # rvaDLLName
.rva __DLL_HANDLE_C__Users_marcusmae_dlltool_build_import_test_lib # rvaHmod
.rva __IAT_C__Users_marcusmae_dlltool_build_import_test_lib # rvaIAT
.rva __INT_C__Users_marcusmae_dlltool_build_import_test_lib # rvaINT
.long 0 # rvaBoundIAT
.long 0 # rvaUnloadIAT
.long 0 # dwTimeStamp

.section .data
__DLL_HANDLE_C__Users_marcusmae_dlltool_build_import_test_lib:
.long 0 # Handle
.long 0

#Stuff for compatibility
.section .idata$5
.long 0
.long 0
__IAT_C__Users_marcusmae_dlltool_build_import_test_lib:
.section .idata$4
.long 0
.long 0
.section .idata$4
__INT_C__Users_marcusmae_dlltool_build_import_test_lib:
.section .idata$2

$ objdump -d dorks00000.o

dorks00000.o: file format pe-x86-64

Disassembly of section .text:

0000000000000000 :
0: ff 25 00 00 00 00 jmpq *0x0(%rip) # 6 <foo+0x6>
6: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # d <foo+0xd>
d: e9 00 00 00 00 jmpq 12 <foo+0x12>

Solved! Just for the records: everything is done right here, and the BinUtils dlltool as of today is interoperable with Visual Studio 2019. The only missing bit was that in the absence of MSVC-orchestrated delayed-load, linker.exe defaults to read-only “.idata”, which has to be altered by e.g.

set_target_properties(${PROJECT_NAME}_test_executable PROPERTIES LINK_FLAGS “/SECTION:.idata,RW”)

Note also the delayimp code for DloadObtainSection() must also be changed slightly, as we don’t have (and don’t really need) the IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT:

Success(return != nullptr)
DLOAD_INLINE
PVOID
DloadObtainSection (
Out PULONG SectionSize,
Out PULONG SectionCharacteristics
)
{

PIMAGE_DATA_DIRECTORY DataDir;
ULONG Entries;
PUCHAR ImageBase;
ULONG Index;
PIMAGE_NT_HEADERS NtHeaders;
PIMAGE_SECTION_HEADER SectionHeader;

ImageBase = (PUCHAR)&__ImageBase;
NtHeaders = (PIMAGE_NT_HEADERS)(ImageBase + __ImageBase.e_lfanew);

SectionHeader = IMAGE_FIRST_SECTION(NtHeaders);
for (Index = 0;
Index < NtHeaders->FileHeader.NumberOfSections;
Index += 1, SectionHeader += 1) {

if (!__memcmp(SectionHeader->Name, “.idata”, __strlen(".idata"))) {
*SectionSize = SectionHeader->Misc.VirtualSize;
*SectionCharacteristics = SectionHeader->Characteristics;
return ImageBase + SectionHeader->VirtualAddress;
}
}

return NULL;
}

пн, 24 авг. 2020 г. в 12:34, Dmitry Mikushin <dmitry@kernelgen.org>: