Skip to content

Commit 3dd7d56

Browse files
committed
Change the ChebHash and MonteCarloHash APIs to accept a function-space similarity rather than a similarity for discrete vectors. This is a fix for issue #19.
1 parent 2547e29 commit 3dd7d56

File tree

4 files changed

+72
-32
lines changed

4 files changed

+72
-32
lines changed

src/function_hashing/chebhash.jl

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,50 @@ Typedefs
1111
========================#
1212

1313
# B = basis, which is a Symbol (e.g. :Chebyshev)
14-
struct ChebHash{B, H<:LSHFunction, I<:RealInterval}
14+
struct ChebHash{B, F<:SimilarityFunction, H<:LSHFunction, I<:RealInterval}
15+
# Discrete-space hash function used after extracting Chebyshev polynomial
16+
# coefficients from the input function.
1517
discrete_hashfn :: H
1618

1719
# Interval over which all input functions are defined.
1820
interval :: I
21+
22+
### Internal ChebHash constructors
23+
function ChebHash{B,F}(
24+
hashfn::H,
25+
interval::I
26+
) where {B, F<:SimilarityFunction, H<:LSHFunction, I<:RealInterval}
27+
28+
new{B,F,H,I}(hashfn, interval)
29+
end
1930
end
2031

2132
### External ChebHash constructors
22-
ChebHash{S}(hashfn::H, interval::I) where {S, H<:LSHFunction, I<:RealInterval} =
23-
ChebHash{S,H,I}(hashfn, interval)
24-
2533
ChebHash(similarity, args...; kws...) =
2634
ChebHash(SimilarityFunction(similarity), args...; kws...)
2735

28-
function ChebHash(::SimilarityFunction{S},
29-
args...;
30-
interval::RealInterval = @interval(-1 x 1),
31-
kws...) where S
32-
33-
discrete_hashfn = LSHFunction(S, args...; kws...)
34-
ChebHash{:Chebyshev}(discrete_hashfn, interval)
36+
for (discrete_sim, fn_sim) in zip([ℓ2, cossim], [L2, cossim])
37+
quote
38+
# Add an implementation of ChebHash that dispatches on the similarity
39+
# function fn_sim
40+
function ChebHash(sim::SimilarityFunction{$fn_sim},
41+
args...;
42+
interval::RealInterval = @interval(-1 x 1),
43+
kws...) where S
44+
45+
discrete_hashfn = LSHFunction($discrete_sim, args...; kws...)
46+
ChebHash{:Chebyshev,typeof(sim)}(discrete_hashfn, interval)
47+
end
48+
end |> eval
3549
end
3650

51+
# Implementation of ChebHash for invalid similarity functions. Just throws
52+
# a TypeError. Necessary because otherwise the first external ChebHash
53+
# constructor will go into an infinite loop when it receives an invalid
54+
# similarity function.
55+
ChebHash(sim::SimilarityFunction, args...; kws...) =
56+
ErrorException("similarity must be ℓ2 or cossim") |> throw
57+
3758
#========================
3859
Helper functions for ChebHash
3960
========================#
@@ -76,8 +97,7 @@ LSHFunction API compliance
7697
n_hashes(hashfn::ChebHash) =
7798
n_hashes(hashfn.discrete_hashfn)
7899

79-
similarity(hashfn::ChebHash) =
80-
similarity(hashfn.discrete_hashfn)
100+
similarity(::ChebHash{T,SimilarityFunction{F}}) where {T,F} = F
81101

82102
hashtype(hashfn::ChebHash) =
83103
hashtype(hashfn.discrete_hashfn)

src/function_hashing/monte_carlo.jl

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ MonteCarloHash for hashing function spaces.
88
Typedefs
99
========================#
1010

11-
struct MonteCarloHash{H <: LSHFunction, D, T, S} <: LSHFunction
11+
struct MonteCarloHash{F, H <: LSHFunction, D, T, S} <: LSHFunction
12+
similarity :: F
13+
1214
discrete_hashfn :: H
1315
μ :: D
1416

@@ -28,14 +30,18 @@ struct MonteCarloHash{H <: LSHFunction, D, T, S} <: LSHFunction
2830
sample_points :: Vector{S}
2931

3032
### Internal constructors
31-
function MonteCarloHash(discrete_hashfn::H, μ, volume, p, n_samples) where {H<:LSHFunction}
33+
function MonteCarloHash(similarity::F, discrete_hashfn::H, μ, volume,
34+
p, n_samples) where {F, H<:LSHFunction}
3235
sample_points = [μ() for ii = 1:n_samples]
3336

3437
T = eltype(μ())
3538
volume = T(volume)
3639
p = T(p)
3740

38-
new{H,typeof(μ),T,eltype(sample_points)}(discrete_hashfn, μ, volume, p, n_samples, sample_points)
41+
new{F,H,typeof(μ),T,eltype(sample_points)}(
42+
similarity, discrete_hashfn, μ, volume,
43+
p, n_samples, sample_points
44+
)
3945
end
4046
end
4147

@@ -45,15 +51,16 @@ end
4551
MonteCarloHash(similarity, args...; kws...) =
4652
MonteCarloHash(SimilarityFunction(similarity), args...; kws...)
4753

48-
for (simfn,p) in zip([ℓ1,ℓ2,cossim], [1,2,2])
54+
for (simfn, fn_space_simfn, p) in zip([ℓ1,ℓ2,cossim], [L1,L2,cossim], [1,2,2])
4955
quote
50-
# Create implementation of MonteCarloHash for current similarity function
51-
# and order of L^p space
52-
function MonteCarloHash(sim::SimilarityFunction{$simfn}, μ, args...;
56+
# Add dispatch for case in which we specify the similarity function
57+
# to be $fn_space_simfn
58+
function MonteCarloHash(sim::SimilarityFunction{$fn_space_simfn}, μ, args...;
5359
n_samples::Int64=1024, volume=1.0, kws...)
5460

5561
discrete_hashfn = LSHFunction($simfn, args...; kws...)
56-
MonteCarloHash(discrete_hashfn, μ, volume, $p, n_samples)
62+
MonteCarloHash($fn_space_simfn, discrete_hashfn, μ, volume,
63+
$p, n_samples)
5764
end
5865
end |> eval
5966
end
@@ -75,8 +82,7 @@ LSHFunction API compliance
7582
hashtype(hashfn::MonteCarloHash) =
7683
hashtype(hashfn.discrete_hashfn)
7784

78-
similarity(hashfn::MonteCarloHash) =
79-
similarity(hashfn.discrete_hashfn)
85+
similarity(hashfn::MonteCarloHash) = hashfn.similarity
8086

8187
n_hashes(hashfn::MonteCarloHash) =
8288
n_hashes(hashfn.discrete_hashfn)

test/function_hashing/test_chebhash.jl

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,26 @@ Tests
1919
@test n_hashes(hashfn) == 5
2020
@test hashtype(hashfn) == hashtype(LSHFunction(cossim))
2121

22-
# Hash L^2([-1,1]) over L^p distance
23-
hashfn = ChebHash(ℓ1)
22+
# Hash L^2([-1,1]) over L^2 distance
23+
hashfn = ChebHash(L2)
2424

2525
@test n_hashes(hashfn) == 1
26-
@test similarity(hashfn) == ℓ1
27-
@test hashtype(hashfn) == hashtype(LSHFunction(ℓ1))
26+
@test similarity(hashfn) == L2
27+
@test hashtype(hashfn) == hashtype(LSHFunction(ℓ2))
28+
end
29+
30+
@testset "Provide invalid similarity" begin
31+
# When we pass in a similarity that is not supported by ChebHash
32+
# we should receive an error.
33+
@test_throws(ErrorException, ChebHash((x,y) -> abs(x-y)))
34+
@test_throws(ErrorException, ChebHash(ℓ1))
35+
@test_throws(ErrorException, ChebHash(L1))
36+
@test_throws(ErrorException, ChebHash(ℓ2))
37+
38+
# Construct a hash function (with valid similarity_ in the same
39+
# manner as we did above in case the ChebHash API ever changes.
40+
# This ensures that we won't forget to update these tests.
41+
_ = ChebHash(L2)
2842
end
2943

3044
#==========
@@ -97,7 +111,7 @@ Tests
97111
### Hash two functions with L^2 distance ≈ 0
98112
f(x) = 0.0
99113
g(x) = (-0.5 x 0.5) ? 1e-3 : 0.0
100-
hashfn = ChebHash(ℓ2, 1024)
114+
hashfn = ChebHash(L2, 1024)
101115

102116
@test embedded_similarity(hashfn, f, g) 1e-3
103117

@@ -115,7 +129,7 @@ Tests
115129

116130
@testset "Hash L^2 distance (nontrivial inputs)" begin
117131
interval = LSHFunctions.@interval(-1.0 x 1.0)
118-
hashfn = ChebHash(ℓ2, 1024; interval=interval)
132+
hashfn = ChebHash(L2, 1024; interval=interval)
119133

120134
trig_function_test() = begin
121135
f = ShiftedSine(π, π * rand())

test/function_hashing/test_monte_carlo.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ Tests
2121
@test hashtype(hashfn) == hashtype(LSHFunction(cossim))
2222

2323
# Hash L^1([0,1]) over L^1 distance
24-
hashfn = MonteCarloHash(ℓ1, μ, 10)
24+
hashfn = MonteCarloHash(L1, μ, 10)
2525

2626
@test n_hashes(hashfn) == 10
27-
@test similarity(hashfn) == ℓ1
27+
@test similarity(hashfn) == L1
2828
@test hashtype(hashfn) == hashtype(LSHFunction(ℓ1))
2929
end
3030

@@ -75,7 +75,7 @@ Tests
7575
# you can map them into R^N isomorphically
7676
N = 4
7777
μ() = N * rand()
78-
hashfn = MonteCarloHash(ℓ1, μ, 1024; volume = N)
78+
hashfn = MonteCarloHash(L1, μ, 1024; volume = N)
7979

8080
@test let success = true, ii = 1
8181
while success && ii 128

0 commit comments

Comments
 (0)