Zulip Chat Archive
Stream: general
Topic: slowness of `log2` for fixed integers
Schrodinger ZHU Yifan (Oct 21 2023 at 22:20):
GCC/Clang is unable to identify log2
as bit scanning:
uint16_t lean_uint16_log2(uint16_t a) {
size_t res = 0;
while (a >= 2) {
res++;
a /= 2;
}
return res;
}
uint16_t lean_uint16_log2_opt(uint16_t a) {
// GCC optimizes this to bsr/tzcnt
// clang cannot
uint16_t res = 0;
while (a) {
res++;
a >>= 1;
}
return res ? res - 1 : 0;
}
uint16_t lean_uint16_log2_opt2(uint16_t a) {
// both work
if(!a) return 0;
return 15 ^ __builtin_clzs(a);
}
Generates:
lean_uint16_log2(unsigned short): # @lean_uint16_log2(unsigned short)
xor eax, eax
cmp di, 2
jb .LBB0_2
.LBB0_1: # =>This Inner Loop Header: Depth=1
movzx ecx, di
inc eax
mov edi, ecx
shr edi
cmp cx, 3
ja .LBB0_1
.LBB0_2:
ret
lean_uint16_log2_opt(unsigned short): # @lean_uint16_log2_opt(unsigned short)
xor ecx, ecx
xor eax, eax
test edi, edi
je .LBB1_2
.LBB1_1: # =>This Inner Loop Header: Depth=1
movzx edx, di
inc eax
mov edi, edx
shr edi
cmp dx, 1
ja .LBB1_1
.LBB1_2:
sub ax, 1
cmovb eax, ecx
ret
lean_uint16_log2_opt2(unsigned short): # @lean_uint16_log2_opt2(unsigned short)
lzcnt ax, di
xor eax, 15
test edi, edi
cmove eax, edi
ret
Schrodinger ZHU Yifan (Oct 21 2023 at 22:20):
https://godbolt.org/z/WE9zs8aGr
Last updated: Dec 20 2023 at 11:08 UTC