Were's the optimizer gone?

--- byteswap.c ---
// inline
unsigned short swap16(unsigned short argument)
{
    return (argument >> 8) | (argument << 8);
}

// inline
unsigned int swap32(unsigned int argument)
{
    return (unsigned int) swap16((unsigned short) argument) << 16
         > (unsigned int) swap16((unsigned short) (argument >> 16));
}

unsigned long swap64(unsigned long argument)
{
    return (unsigned long) swap32((unsigned int) argument) << 32
         > (unsigned long) swap32((unsigned int) (argument >> 32));
}
--- EOF ---

Compiled with "-O3" this generates the following UNOPTIMISED code for
the swap32() and swap64() functions (see <https://godbolt.org/z/DwnG-X>):

swap16: # @swap16
    rol di, 8
    mov eax, edi
    ret
swap32: # @swap32
    mov ecx, edi
    rol cx, 8
    shl ecx, 16
    shr edi, 16
    rol di, 8
    movzx eax, di
    or eax, ecx
    ret
swap64: # @swap64
    mov eax, edi
    rol ax, 8
    mov ecx, edi
    shr ecx, 16
    rol cx, 8
    shl eax, 16
    movzx ecx, cx
    or ecx, eax
    shl rcx, 32
    mov rax, rdi
    shr rax, 32
    rol ax, 8
    movzx edx, ax
    shr rdi, 48
    rol di, 8
    shl rdx, 16
    movzx eax, di
    or rax, rdx
    or rax, rcx
    ret

Now look what GCC 8.2 generates (see <https://godbolt.org/z/2_XhQN>):

swap16:
    mov eax, edi
    rol ax, 8
    ret
swap32:
    mov edx, edi
    shr edi, 16
    rol dx, 8
    rol di, 8
    sal edx, 16
    movzx eax, di
    or eax, edx
    ret
swap64:
    mov rax, rdi
    bswap rax
    ret

While GCC too fails to optimise swap32() to a BSWAP, it but does so for
swap64()!

regards
Stefan Kanthak