Add internal top_set_bit function (#47523)

LilithHafner · Lilith Hafner · mcabbott · web-flow · commit 708d1bdb36e9 · 2023-01-08T17:16:48.000-05:00
* add top_set_bit

Co-authored-by: Lilith Hafner &lt;Lilith.Hafner@gmail.com&gt;
Co-authored-by: Michael Abbott &lt;32575566+mcabbott@users.noreply.github.com&gt;
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
@@ -575,7 +575,7 @@ function convert(::Type{T}, x::AbstractDict) where T<:AbstractDict
 end
 
 # hashing objects by identity
-_tablesz(x::T) where T <: Integer = x < 16 ? T(16) : one(T)<<((sizeof(T)<<3)-leading_zeros(x-one(T)))
+_tablesz(x::T) where T <: Integer = x < 16 ? T(16) : one(T)<<(top_set_bit(x-one(T)))
 
 TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}
 
diff --git a/base/bitarray.jl b/base/bitarray.jl
@@ -1545,12 +1545,12 @@ function unsafe_bitfindprev(Bc::Vector{UInt64}, start::Int)
 
     @inbounds begin
         if Bc[chunk_start] & mask != 0
-            return (chunk_start-1) << 6 + (64 - leading_zeros(Bc[chunk_start] & mask))
+            return (chunk_start-1) << 6 + (top_set_bit(Bc[chunk_start] & mask))
         end
 
         for i = (chunk_start-1):-1:1
             if Bc[i] != 0
-                return (i-1) << 6 + (64 - leading_zeros(Bc[i]))
+                return (i-1) << 6 + (top_set_bit(Bc[i]))
             end
         end
     end
diff --git a/base/float.jl b/base/float.jl
@@ -221,7 +221,7 @@ end
 
 function Float32(x::UInt128)
     x == 0 && return 0f0
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    n = top_set_bit(x) # ndigits0z(x,2)
     if n <= 24
         y = ((x % UInt32) << (24-n)) & 0x007f_ffff
     else
@@ -237,7 +237,7 @@ function Float32(x::Int128)
     x == 0 && return 0f0
     s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
     x = abs(x) % UInt128
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    n = top_set_bit(x) # ndigits0z(x,2)
     if n <= 24
         y = ((x % UInt32) << (24-n)) & 0x007f_ffff
     else
diff --git a/base/gmp.jl b/base/gmp.jl
@@ -10,7 +10,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
              trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer
+             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -396,7 +396,7 @@ function Float64(x::BigInt, ::RoundingMode{:Nearest})
         z = Float64((unsafe_load(x.d, 2) % UInt64) << BITS_PER_LIMB + unsafe_load(x.d))
     else
         y1 = unsafe_load(x.d, xsize) % UInt64
-        n = 64 - leading_zeros(y1)
+        n = top_set_bit(y1)
         # load first 54(1 + 52 bits of fraction + 1 for rounding)
         y = y1 >> (n - (precision(Float64)+1))
         if Limb == UInt64
@@ -586,6 +586,12 @@ Number of ones in the binary representation of abs(x).
 """
 count_ones_abs(x::BigInt) = iszero(x) ? 0 : MPZ.mpn_popcount(x)
 
+function top_set_bit(x::BigInt)
+    x < 0 && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned."))
+    x == 0 && return 0
+    Int(ccall((:__gmpz_sizeinbase, :libgmp), Csize_t, (Base.GMP.MPZ.mpz_t, Cint), x, 2))
+end
+
 divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y)
 divrem(x::BigInt, y::Integer) = MPZ.tdiv_qr(x, big(y))
 
diff --git a/base/int.jl b/base/int.jl
@@ -482,6 +482,31 @@ julia> trailing_ones(3)
 """
 trailing_ones(x::Integer) = trailing_zeros(~x)
 
+"""
+    top_set_bit(x::Integer) -> Integer
+
+The number of bits in `x`'s binary representation, excluding leading zeros.
+
+Equivalently, the position of the most significant set bit in `x`'s binary
+representation, measured from the least significant side.
+
+Negative `x` are only supported when `x::BitSigned`.
+
+See also: [`ndigits0z`](@ref), [`ndigits`](@ref).
+
+# Examples
+```jldoctest
+julia> top_set_bit(4)
+3
+
+julia> top_set_bit(0)
+0
+
+julia> top_set_bit(-1)
+64
+"""
+top_set_bit(x::BitInteger) = 8sizeof(x) - leading_zeros(x)
+
 ## integer comparisons ##
 
 (< )(x::T, y::T) where {T<:BitUnsigned} = ult_int(x, y)
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
@@ -391,9 +391,9 @@ end
 # optimization: promote the modulus m to BigInt only once (cf. widemul in generic powermod above)
 powermod(x::Integer, p::Integer, m::Union{Int128,UInt128}) = oftype(m, powermod(x, p, big(m)))
 
-_nextpow2(x::Unsigned) = oneunit(x)<<((sizeof(x)<<3)-leading_zeros(x-oneunit(x)))
+_nextpow2(x::Unsigned) = oneunit(x)<<(top_set_bit(x-oneunit(x)))
 _nextpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_nextpow2(unsigned(-x)) : _nextpow2(unsigned(x)))
-_prevpow2(x::Unsigned) = one(x) << unsigned((sizeof(x)<<3)-leading_zeros(x)-1)
+_prevpow2(x::Unsigned) = one(x) << unsigned(top_set_bit(x)-1)
 _prevpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_prevpow2(unsigned(-x)) : _prevpow2(unsigned(x)))
 
 """
@@ -526,7 +526,7 @@ const powers_of_ten = [
     0x002386f26fc10000, 0x016345785d8a0000, 0x0de0b6b3a7640000, 0x8ac7230489e80000,
 ]
 function bit_ndigits0z(x::Base.BitUnsigned64)
-    lz = (sizeof(x)<<3)-leading_zeros(x)
+    lz = top_set_bit(x)
     nd = (1233*lz)>>12+1
     nd -= x < powers_of_ten[nd]
 end
@@ -571,12 +571,12 @@ function ndigits0zpb(x::Integer, b::Integer)
     x = abs(x)
     if x isa Base.BitInteger
         x = unsigned(x)::Unsigned
-        b == 2  && return sizeof(x)<<3 - leading_zeros(x)
-        b == 8  && return (sizeof(x)<<3 - leading_zeros(x) + 2) ÷ 3
+        b == 2  && return top_set_bit(x)
+        b == 8  && return (top_set_bit(x) + 2) ÷ 3
         b == 16 && return sizeof(x)<<1 - leading_zeros(x)>>2
         b == 10 && return bit_ndigits0z(x)
         if ispow2(b)
-            dv, rm = divrem(sizeof(x)<<3 - leading_zeros(x), trailing_zeros(b))
+            dv, rm = divrem(top_set_bit(x), trailing_zeros(b))
             return iszero(rm) ? dv : dv + 1
         end
     end
@@ -638,6 +638,9 @@ function ndigits0z(x::Integer, b::Integer)
     end
 end
 
+# Extends the definition in base/int.jl
+top_set_bit(x::Integer) = ceil(Integer, log2(x + oneunit(x)))
+
 """
     ndigits(n::Integer; base::Integer=10, pad::Integer=1)
 
@@ -673,7 +676,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba
 ## integer to string functions ##
 
 function bin(x::Unsigned, pad::Int, neg::Bool)
-    m = 8 * sizeof(x) - leading_zeros(x)
+    m = top_set_bit(x)
     n = neg + max(pad, m)
     a = StringVector(n)
     # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
@@ -700,7 +703,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
 end
 
 function oct(x::Unsigned, pad::Int, neg::Bool)
-    m = div(8 * sizeof(x) - leading_zeros(x) + 2, 3)
+    m = div(top_set_bit(x) + 2, 3)
     n = neg + max(pad, m)
     a = StringVector(n)
     i = n
diff --git a/base/sort.jl b/base/sort.jl
@@ -5,7 +5,7 @@ module Sort
 using Base.Order
 
 using Base: copymutable, midpoint, require_one_based_indexing, uinttype,
-    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType
+    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit
 
 import Base:
     sort,
diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl
@@ -109,7 +109,7 @@ function fromfraction(f::Int128)
     # 1. get leading term truncated to 26 bits
     s = ((f < 0) % UInt64) << 63     # sign bit
     x = abs(f) % UInt128             # magnitude
-    n1 = 128-leading_zeros(x)         # ndigits0z(x,2)
+    n1 = Base.top_set_bit(x)          # ndigits0z(x,2)
     m1 = ((x >> (n1-26)) % UInt64) << 27
     d1 = ((n1-128+1021) % UInt64) << 52
     z1 = reinterpret(Float64, s | (d1 + m1))
@@ -119,7 +119,7 @@ function fromfraction(f::Int128)
     if x2 == 0
         return (z1, 0.0)
     end
-    n2 = 128-leading_zeros(x2)
+    n2 = Base.top_set_bit(x2)
     m2 = (x2 >> (n2-53)) % UInt64
     d2 = ((n2-128+1021) % UInt64) << 52
     z2 = reinterpret(Float64,  s | (d2 + m2))
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
@@ -254,7 +254,7 @@ nbitslen(::Type{T}, len, offset) where {T<:IEEEFloat} =
     min(cld(precision(T), 2), nbitslen(len, offset))
 # The +1 here is for safety, because the precision of the significand
 # is 1 bit higher than the number that are explicitly stored.
-nbitslen(len, offset) = len < 2 ? 0 : ceil(Int, log2(max(offset-1, len-offset))) + 1
+nbitslen(len, offset) = len < 2 ? 0 : top_set_bit(max(offset-1, len-offset) - 1) + 1
 
 eltype(::Type{TwicePrecision{T}}) where {T} = T
 
@@ -310,7 +310,7 @@ function *(x::TwicePrecision, v::Number)
 end
 function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
     v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
-    nb = ceil(Int, log2(abs(v)))
+    nb = top_set_bit(abs(v)-1)
     u = truncbits(x.hi, nb)
     TwicePrecision(canonicalize2(u*v, ((x.hi-u) + x.lo)*v)...)
 end
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
@@ -441,12 +441,42 @@ end
     end
 end
 
-@testset "leading_ones and count_zeros" begin
+@testset "leading_ones, count_zeros, etc." begin
     @test leading_ones(UInt32(Int64(2) ^ 32 - 2)) == 31
     @test leading_ones(1) == 0
     @test leading_zeros(Int32(1)) == 31
     @test leading_zeros(UInt32(Int64(2) ^ 32 - 2)) == 0
 
+    @test Base.top_set_bit(3) == 2
+    @test Base.top_set_bit(-Int64(17)) == 64
+    @test Base.top_set_bit(big(15)) != Base.top_set_bit(big(16)) == Base.top_set_bit(big(17)) == 5
+    @test_throws DomainError Base.top_set_bit(big(-17))
+
+    struct MyInt <: Integer
+        x::Int
+    end
+    MyInt(x::MyInt) = x
+    Base.:+(a::MyInt, b::MyInt) = a.x + b.x
+
+    for n in 0:100
+        x = ceil(Int, log2(n + 1))
+        @test x == Base.top_set_bit(Int128(n)) == Base.top_set_bit(unsigned(Int128(n)))
+        @test x == Base.top_set_bit(Int32(n)) == Base.top_set_bit(unsigned(Int64(n)))
+        @test x == Base.top_set_bit(Int8(n)) == Base.top_set_bit(unsigned(Int8(n)))
+        @test x == Base.top_set_bit(big(n))   # BigInt fallback
+        @test x == Base.top_set_bit(MyInt(n)) # generic fallback
+    end
+
+    for n in -10:-1
+        @test 128 == Base.top_set_bit(Int128(n)) == Base.top_set_bit(unsigned(Int128(n)))
+        @test 32  == Base.top_set_bit(Int32(n)) == Base.top_set_bit(unsigned(Int32(n)))
+        @test 8   == Base.top_set_bit(Int8(n)) == Base.top_set_bit(unsigned(Int8(n)))
+        @test_throws DomainError Base.top_set_bit(big(n))
+        # This error message should never be exposed to the end user anyway.
+        err = n == -1 ? InexactError : DomainError
+        @test_throws err Base.top_set_bit(MyInt(n))
+    end
+
     @test count_zeros(Int64(1)) == 63
 end