Where's the optimiser gone? (part 6): "rotation" idiom not recognised for 64-bit value on 32-bit target

unsigned long long rotate(unsigned long long value, unsigned int count)
{
    return (value << count) | (value >> (64 - count));
}

Compiled with "-O3 -m32" clang produces the following unoptimised code
(see <https://godbolt.org/z/OksNnO>): OUCH!

rotate: # @rotate
    push ebp |
    push ebx | push ebx
    push edi |
    push esi |
    mov eax, dword ptr [esp + 20] | mov eax, dword ptr [esp + 8]
    mov esi, dword ptr [esp + 24] | mov edx, dword ptr [esp + 12]
    mov ecx, dword ptr [esp + 28] | mov ecx, dword ptr [esp + 16]
    xor edx, edx | mov ebx, edx
    mov ebx, 64 |
    mov edi, eax |
    mov ebp, esi |
    shl edi, cl |
    shld ebp, eax, cl |
    test cl, 32 |
    cmovne ebp, edi |
    cmovne edi, edx |
    sub ebx, ecx |
    mov edx, esi |
    mov ecx, ebx |
    shr edx, cl |
    shrd eax, esi, cl |
    test bl, 32 | test cl, 32
    mov ecx, 0 | cmovne edx, eax
    cmovne eax, edx | cmovne eax, ebx
    cmovne edx, ecx | cmovne ebx, edx
    or eax, edi | shld edx, eax, cl
    or edx, ebp | shld eax, ebx, cl
    pop esi |
    pop edi |
    pop ebx | pop ebx
    pop ebp |
    ret | ret