|
| 1 | +-- | Parallel versions of 'filter' and 'simpleFilter' |
| 2 | +module Text.Fuzzy.Parallel |
| 3 | +( |
| 4 | + filter, |
| 5 | + simpleFilter, |
| 6 | + -- reexports |
| 7 | + Fuzzy(..), |
| 8 | + match |
| 9 | +) where |
| 10 | + |
| 11 | +import Control.Parallel.Strategies (Eval, evalTraversable, |
| 12 | + parListChunk, rseq, using) |
| 13 | +import Data.List (sortOn) |
| 14 | +import Data.Maybe (catMaybes) |
| 15 | +import Data.Monoid.Textual (TextualMonoid) |
| 16 | +import Data.Ord (Down (Down)) |
| 17 | +import Prelude hiding (filter) |
| 18 | +import Text.Fuzzy (Fuzzy (..), match) |
| 19 | + |
| 20 | +-- | Evaluation that forces the 'score' field |
| 21 | +forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s) |
| 22 | +forceScore it@Fuzzy{score} = do |
| 23 | + score' <- rseq score |
| 24 | + return it{score = score'} |
| 25 | + |
| 26 | +-- | The function to filter a list of values by fuzzy search on the text extracted from them. |
| 27 | +-- |
| 28 | +-- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False |
| 29 | +-- [Fuzzy {original = ("Standard ML",1990), rendered = "standard <m><l>", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca<m><l>", score = 4}] |
| 30 | +{-# INLINABLE filter #-} |
| 31 | +filter :: (TextualMonoid s) |
| 32 | + => Int -- ^ Chunk size. 1000 works well. |
| 33 | + -> s -- ^ Pattern. |
| 34 | + -> [t] -- ^ The list of values containing the text to search in. |
| 35 | + -> s -- ^ The text to add before each match. |
| 36 | + -> s -- ^ The text to add after each match. |
| 37 | + -> (t -> s) -- ^ The function to extract the text from the container. |
| 38 | + -> Bool -- ^ Case sensitivity. |
| 39 | + -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first. |
| 40 | +filter chunkSize pattern ts pre post extract caseSen = |
| 41 | + sortOn (Down . score) |
| 42 | + (catMaybes |
| 43 | + (map (\t -> match pattern t pre post extract caseSen) ts |
| 44 | + `using` |
| 45 | + parListChunk chunkSize (evalTraversable forceScore))) |
| 46 | + |
| 47 | +-- | Return all elements of the list that have a fuzzy |
| 48 | +-- match against the pattern. Runs with default settings where |
| 49 | +-- nothing is added around the matches, as case insensitive. |
| 50 | +-- |
| 51 | +-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"] |
| 52 | +-- ["vim","virtual machine"] |
| 53 | +{-# INLINABLE simpleFilter #-} |
| 54 | +simpleFilter :: (TextualMonoid s) |
| 55 | + => Int -- ^ Chunk size. 1000 works well. |
| 56 | + -> s -- ^ Pattern to look for. |
| 57 | + -> [s] -- ^ List of texts to check. |
| 58 | + -> [s] -- ^ The ones that match. |
| 59 | +simpleFilter chunk pattern xs = |
| 60 | + map original $ filter chunk pattern xs mempty mempty id False |
0 commit comments