Zulip Chat Archive

Stream: general

Topic: slowness of `log2` for fixed integers


Schrodinger ZHU Yifan (Oct 21 2023 at 22:20):

GCC/Clang is unable to identify log2 as bit scanning:

uint16_t lean_uint16_log2(uint16_t a) {
    size_t res = 0;
    while (a >= 2) {
        res++;
        a /= 2;
    }
    return res;
}
uint16_t lean_uint16_log2_opt(uint16_t a) {
    // GCC optimizes this to bsr/tzcnt
    // clang cannot
    uint16_t res = 0;
    while (a) {
        res++;
        a >>= 1;
    }
    return res ? res - 1 : 0;
}
uint16_t lean_uint16_log2_opt2(uint16_t a) {
    // both work
    if(!a) return 0;
    return 15 ^ __builtin_clzs(a);
}

Generates:

lean_uint16_log2(unsigned short):                  # @lean_uint16_log2(unsigned short)
        xor     eax, eax
        cmp     di, 2
        jb      .LBB0_2
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        movzx   ecx, di
        inc     eax
        mov     edi, ecx
        shr     edi
        cmp     cx, 3
        ja      .LBB0_1
.LBB0_2:
        ret
lean_uint16_log2_opt(unsigned short):              # @lean_uint16_log2_opt(unsigned short)
        xor     ecx, ecx
        xor     eax, eax
        test    edi, edi
        je      .LBB1_2
.LBB1_1:                                # =>This Inner Loop Header: Depth=1
        movzx   edx, di
        inc     eax
        mov     edi, edx
        shr     edi
        cmp     dx, 1
        ja      .LBB1_1
.LBB1_2:
        sub     ax, 1
        cmovb   eax, ecx
        ret
lean_uint16_log2_opt2(unsigned short):             # @lean_uint16_log2_opt2(unsigned short)
        lzcnt   ax, di
        xor     eax, 15
        test    edi, edi
        cmove   eax, edi
        ret

Schrodinger ZHU Yifan (Oct 21 2023 at 22:20):

https://godbolt.org/z/WE9zs8aGr


Last updated: Dec 20 2023 at 11:08 UTC