Where's the optimiser gone (part 8): remainder calculated "by hand", not via call of __divmoddi4

long long quotient(long long numerator,
                   long long denominator,
                   long long *remainder)
{
    *remainder = numerator % denominator;
    return numerator / denominator;
}

long long remainder(long long numerator,
                    long long denominator,
                    long long *quotient)
{
    *quotient = numerator / denominator;
    return numerator % denominator;
}

Compiled with "-O3 -target i386" this yields the following
BLOATED unoptimised code (<https://godbolt.org/z/2rR2fu>)

JFTR: "-O3 -target i386-win32" too fails to generate a call
      of __alldvrm

quotient: # @quotient
    push ebp | jmp __divmoddi4
    mov ebp, esp
    push ebx
    push edi
    push esi
    push eax
    mov edi, dword ptr [ebp + 20]
    mov ebx, dword ptr [ebp + 8]
    mov esi, dword ptr [ebp + 16]
    push edi
    push esi
    push dword ptr [ebp + 12]
    push ebx
    call __divdi3
    add esp, 16
    mov ecx, eax
    mov dword ptr [ebp - 16], edx # 4-byte Spill
    imul edi, eax
    mul esi
    add edx, edi
    mov edi, dword ptr [ebp - 16] # 4-byte Reload
    imul esi, edi
    add esi, edx
    mov edx, dword ptr [ebp + 12]
    sub ebx, eax
    mov eax, dword ptr [ebp + 24]
    sbb edx, esi
    mov dword ptr [eax], ebx
    mov dword ptr [eax + 4], edx
    mov eax, ecx
    mov edx, edi
    add esp, 4
    pop esi
    pop edi
    pop ebx
    pop ebp
    ret

remainder: # @remainder
    push ebp
    mov ebp, esp
    push ebx
    push edi
    push esi
    mov esi, dword ptr [ebp + 20]
    mov ebx, dword ptr [ebp + 8]
    mov edi, dword ptr [ebp + 12]
    push esi
    push dword ptr [ebp + 16]
    push edi
    push ebx
    call __divdi3
    add esp, 16
    mov ecx, edx
    mov edx, dword ptr [ebp + 24]
    imul esi, eax
    mov dword ptr [edx + 4], ecx
    mov dword ptr [edx], eax
    mul dword ptr [ebp + 16]
    imul ecx, dword ptr [ebp + 16]
    add edx, esi
    add ecx, edx
    sub ebx, eax
    sbb edi, ecx
    mov eax, ebx
    mov edx, edi
    pop esi
    pop edi
    pop ebx
    pop ebp
    ret

Could you please file this as a bugreport on https://bugs.llvm.org/?
"libraries" / "Scalar Optimizations" seems to be a good component for this kind of missed optimization opportunity.

thanks,
adrian