Where's the optimiser gone? (part 7): poor register allocation for 64-bit multiplication and following addition on 32-bit target

unsigned long long lcg64() // linear congruential generator, period length 2**64
{ // see George Marsaglia's KISS64
    static unsigned long long z = 1066149217761810ULL;
    z = z * 6906969069ULL + 1234567ULL;
    return z;

Compiled with "-O3 -target i386" yields
(see <https://godbolt.org/z/ljQJAh>):

lcg64: # @lcg64
    push ebp
    mov ebp, esp
    mov ecx, dword ptr [lcg64.z]
    mov edx, -1682965523
    mov eax, ecx
    mul edx
    add edx, ecx
    imul ecx, dword ptr [lcg64.z+4], -1682965523
    add eax, 1234567
    mov dword ptr [lcg64.z], eax
    adc ecx, edx | adc edx, ecx
    mov dword ptr [lcg64.z+4], ecx | mov dword ptr [lcg64.z+4], edx
    mov edx, ecx | ;; superfluous
    pop ebp
    .quad 1066149217761810 # 0x3c9a83566fa12