Where's the optimiser gone? (part 2)

Hi @ll,

when called with "-O3 -target i386-win32", the compiler fails
to generate properly optimised code (as shown on the right side)
for two of the following (rather trivial) functions:
(see <https://godbolt.org/z/jQxIIi>)

Especially notice the difference between the signed and unsigned
variants: the latter are properly optimised!

regards
Stefan Kanthak

--- sample2.c ---

__inline
__int64 __fastcall Int32x32To64(long x, long y)
{
    return (__int64) x * y;
}

long Int32x32To64Div32(long a, long b, long c)
{
    return Int32x32To64(a, b) / c;
}

_Int32x32To64Div32: # @Int32x32To64Div32
    push esi
    mov eax, dword ptr [esp + 12]
    mov ecx, dword ptr [esp + 16] | mov eax, dword ptr [esp + 12]
    imul dword ptr [esp + 8] | cdq
    mov esi, ecx | push edx
    sar esi, 31 | push eax
    push esi | mov eax, dword ptr [esp + 8]
    push ecx | imul dword ptr [esp + 4]
    push edx | push edx
    push eax | push eax
    call __alldiv | jmp __alldiv
    pop esi
    ret

long Int32x32To64Rem32(long a, long b, long c)
{
    return Int32x32To64(a, b) % c;
}

_Int32x32To64Rem32: # @Int32x32To64Rem32
    push esi
    mov eax, dword ptr [esp + 12]
    mov ecx, dword ptr [esp + 16] | mov eax, dword ptr [esp + 12]
    imul dword ptr [esp + 8] | cdq
    mov esi, ecx | push edx
    sar esi, 31 | push eax
    push esi | mov eax, dword ptr [esp + 8]
    push ecx | imul dword ptr [esp + 4]
    push edx | push edx
    push eax | push eax
    call __allrem | jmp __alldiv
    pop esi
    ret

__inline
unsigned __int64 __fastcall UInt32x32To64(unsigned long x, unsigned long y)
{
    return (unsigned __int64) x * y;
}

unsigned long UInt32x32To64Div32(unsigned long a, unsigned long b, unsigned long c)
{
    return UInt32x32To64(a, b) / c;
}

_UInt32x32To64Div32: # @UInt32x32To64Div32
    mov eax, dword ptr [esp + 8]
    mul dword ptr [esp + 4]
    push 0
    push dword ptr [esp + 16]
    push edx
    push eax
    call __aulldiv
    ret

unsigned long UInt32x32To64Rem32(unsigned long a, unsigned long b, unsigned long c)
{
    return UInt32x32To64(a, b) % c;
}

_UInt32x32To64Rem32: # @UInt32x32To64Rem32
    mov eax, dword ptr [esp + 8]
    mul dword ptr [esp + 4]
    push 0
    push dword ptr [esp + 16]
    push edx
    push eax
    call __aullrem
    ret