@@ -8,41 +8,44 @@ module Text.Fuzzy.Parallel
8
8
match
9
9
) where
10
10
11
- import Control.Parallel.Strategies (Eval , evalTraversable ,
12
- parListChunk , rseq , using )
11
+ import Control.Monad.ST (runST )
12
+ import Control.Parallel.Strategies (Eval , Strategy , evalTraversable ,
13
+ parListChunk , parTraversable ,
14
+ rseq , using )
15
+ import Data.Function (on )
13
16
import Data.List (sortOn )
14
17
import Data.Maybe (catMaybes )
15
18
import Data.Monoid.Textual (TextualMonoid )
16
19
import Data.Ord (Down (Down ))
20
+ import Data.Vector (Vector , (!) )
21
+ import qualified Data.Vector as V
22
+ import qualified Data.Vector.Algorithms.Heap as VA
17
23
import Prelude hiding (filter )
18
24
import Text.Fuzzy (Fuzzy (.. ), match )
19
25
20
- -- | Evaluation that forces the 'score' field
21
- forceScore :: TextualMonoid s => Fuzzy t s -> Eval (Fuzzy t s )
22
- forceScore it@ Fuzzy {score} = do
23
- score' <- rseq score
24
- return it{score = score'}
25
-
26
26
-- | The function to filter a list of values by fuzzy search on the text extracted from them.
27
27
--
28
- -- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False
29
- -- [Fuzzy {original = ("Standard ML",1990), rendered = "standard <m><l>", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca<m><l>", score = 4}]
30
- {-# INLINABLE filter #-}
28
+ -- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
29
+ -- 200
31
30
filter :: (TextualMonoid s )
32
31
=> Int -- ^ Chunk size. 1000 works well.
32
+ -> Int -- ^ Max results
33
33
-> s -- ^ Pattern.
34
34
-> [t ] -- ^ The list of values containing the text to search in.
35
35
-> s -- ^ The text to add before each match.
36
36
-> s -- ^ The text to add after each match.
37
37
-> (t -> s ) -- ^ The function to extract the text from the container.
38
38
-> Bool -- ^ Case sensitivity.
39
39
-> [Fuzzy t s ] -- ^ The list of results, sorted, highest score first.
40
- filter chunkSize pattern ts pre post extract caseSen =
41
- sortOn (Down . score)
42
- (catMaybes
43
- (map (\ t -> match pattern t pre post extract caseSen) ts
40
+ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
41
+ let v = (V. catMaybes
42
+ (V. map (\ t -> match pattern t pre post extract caseSen) (V. fromList ts)
44
43
`using`
45
- parListChunk chunkSize (evalTraversable forceScore)))
44
+ parVectorChunk chunkSize (evalTraversable forceScore)))
45
+ v' <- V. unsafeThaw v
46
+ VA. partialSortBy (compare `on` (Down . score)) v' maxRes
47
+ v'' <- V. unsafeFreeze v'
48
+ return $ take maxRes $ V. toList v''
46
49
47
50
-- | Return all elements of the list that have a fuzzy
48
51
-- match against the pattern. Runs with default settings where
@@ -53,8 +56,40 @@ filter chunkSize pattern ts pre post extract caseSen =
53
56
{-# INLINABLE simpleFilter #-}
54
57
simpleFilter :: (TextualMonoid s )
55
58
=> Int -- ^ Chunk size. 1000 works well.
59
+ -> Int -- ^ Max results
56
60
-> s -- ^ Pattern to look for.
57
61
-> [s ] -- ^ List of texts to check.
58
62
-> [s ] -- ^ The ones that match.
59
- simpleFilter chunk pattern xs =
60
- map original $ filter chunk pattern xs mempty mempty id False
63
+ simpleFilter chunk maxRes pattern xs =
64
+ map original $ filter chunk maxRes pattern xs mempty mempty id False
65
+
66
+ --------------------------------------------------------------------------------
67
+
68
+ -- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
69
+ parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a )
70
+ parVectorChunk chunkSize st v =
71
+ V. concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v)
72
+
73
+ -- >>> chunkVector 3 (V.fromList [0..10])
74
+ -- >>> chunkVector 3 (V.fromList [0..11])
75
+ -- >>> chunkVector 3 (V.fromList [0..12])
76
+ -- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
77
+ -- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
78
+ -- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
79
+ chunkVector :: Int -> Vector a -> [Vector a ]
80
+ chunkVector chunkSize v = do
81
+ let indices = pairwise $ [0 , chunkSize .. l- 1 ] ++ [l]
82
+ l = V. length v
83
+ [V. fromListN (h- l) [v ! j | j <- [l .. h- 1 ]]
84
+ | (l,h) <- indices]
85
+
86
+ pairwise :: [a ] -> [(a ,a )]
87
+ pairwise [] = []
88
+ pairwise [_] = []
89
+ pairwise (x: y: xs) = (x,y) : pairwise (y: xs)
90
+
91
+ -- | Evaluation that forces the 'score' field
92
+ forceScore :: TextualMonoid s => Fuzzy t s -> Eval (Fuzzy t s )
93
+ forceScore it@ Fuzzy {score} = do
94
+ score' <- rseq score
95
+ return it{score = score'}
0 commit comments