Skip to content

Commit 2e6d5eb

Browse files
committed
Efficiently with vectors
1 parent c790fb2 commit 2e6d5eb

File tree

2 files changed

+54
-18
lines changed

2 files changed

+54
-18
lines changed

ghcide/ghcide.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ library
8787
unordered-containers >= 0.2.10.0,
8888
utf8-string,
8989
vector,
90+
vector-algorithms,
9091
hslogger,
9192
Diff ^>=0.4.0,
9293
vector,

ghcide/src/Text/Fuzzy/Parallel.hs

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,41 +8,44 @@ module Text.Fuzzy.Parallel
88
match
99
) where
1010

11-
import Control.Parallel.Strategies (Eval, evalTraversable,
12-
parListChunk, rseq, using)
11+
import Control.Monad.ST (runST)
12+
import Control.Parallel.Strategies (Eval, Strategy, evalTraversable,
13+
parListChunk, parTraversable,
14+
rseq, using)
15+
import Data.Function (on)
1316
import Data.List (sortOn)
1417
import Data.Maybe (catMaybes)
1518
import Data.Monoid.Textual (TextualMonoid)
1619
import Data.Ord (Down (Down))
20+
import Data.Vector (Vector, (!))
21+
import qualified Data.Vector as V
22+
import qualified Data.Vector.Algorithms.Heap as VA
1723
import Prelude hiding (filter)
1824
import Text.Fuzzy (Fuzzy (..), match)
1925

20-
-- | Evaluation that forces the 'score' field
21-
forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s)
22-
forceScore it@Fuzzy{score} = do
23-
score' <- rseq score
24-
return it{score = score'}
25-
2626
-- | The function to filter a list of values by fuzzy search on the text extracted from them.
2727
--
28-
-- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False
29-
-- [Fuzzy {original = ("Standard ML",1990), rendered = "standard <m><l>", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca<m><l>", score = 4}]
30-
{-# INLINABLE filter #-}
28+
-- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
29+
-- 200
3130
filter :: (TextualMonoid s)
3231
=> Int -- ^ Chunk size. 1000 works well.
32+
-> Int -- ^ Max results
3333
-> s -- ^ Pattern.
3434
-> [t] -- ^ The list of values containing the text to search in.
3535
-> s -- ^ The text to add before each match.
3636
-> s -- ^ The text to add after each match.
3737
-> (t -> s) -- ^ The function to extract the text from the container.
3838
-> Bool -- ^ Case sensitivity.
3939
-> [Fuzzy t s] -- ^ The list of results, sorted, highest score first.
40-
filter chunkSize pattern ts pre post extract caseSen =
41-
sortOn (Down . score)
42-
(catMaybes
43-
(map (\t -> match pattern t pre post extract caseSen) ts
40+
filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
41+
let v = (V.catMaybes
42+
(V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts)
4443
`using`
45-
parListChunk chunkSize (evalTraversable forceScore)))
44+
parVectorChunk chunkSize (evalTraversable forceScore)))
45+
v' <- V.unsafeThaw v
46+
VA.partialSortBy (compare `on` (Down . score)) v' maxRes
47+
v'' <- V.unsafeFreeze v'
48+
return $ take maxRes $ V.toList v''
4649

4750
-- | Return all elements of the list that have a fuzzy
4851
-- match against the pattern. Runs with default settings where
@@ -53,8 +56,40 @@ filter chunkSize pattern ts pre post extract caseSen =
5356
{-# INLINABLE simpleFilter #-}
5457
simpleFilter :: (TextualMonoid s)
5558
=> Int -- ^ Chunk size. 1000 works well.
59+
-> Int -- ^ Max results
5660
-> s -- ^ Pattern to look for.
5761
-> [s] -- ^ List of texts to check.
5862
-> [s] -- ^ The ones that match.
59-
simpleFilter chunk pattern xs =
60-
map original $ filter chunk pattern xs mempty mempty id False
63+
simpleFilter chunk maxRes pattern xs =
64+
map original $ filter chunk maxRes pattern xs mempty mempty id False
65+
66+
--------------------------------------------------------------------------------
67+
68+
-- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
69+
parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a)
70+
parVectorChunk chunkSize st v =
71+
V.concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v)
72+
73+
-- >>> chunkVector 3 (V.fromList [0..10])
74+
-- >>> chunkVector 3 (V.fromList [0..11])
75+
-- >>> chunkVector 3 (V.fromList [0..12])
76+
-- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
77+
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
78+
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
79+
chunkVector :: Int -> Vector a -> [Vector a]
80+
chunkVector chunkSize v = do
81+
let indices = pairwise $ [0, chunkSize .. l-1] ++ [l]
82+
l = V.length v
83+
[V.fromListN (h-l) [v ! j | j <- [l .. h-1]]
84+
| (l,h) <- indices]
85+
86+
pairwise :: [a] -> [(a,a)]
87+
pairwise [] = []
88+
pairwise [_] = []
89+
pairwise (x:y:xs) = (x,y) : pairwise (y:xs)
90+
91+
-- | Evaluation that forces the 'score' field
92+
forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s)
93+
forceScore it@Fuzzy{score} = do
94+
score' <- rseq score
95+
return it{score = score'}

0 commit comments

Comments
 (0)