diff --git a/Data/Containers/ListUtils.hs b/Data/Containers/ListUtils.hs new file mode 100644 index 000000000..f8885f313 --- /dev/null +++ b/Data/Containers/ListUtils.hs @@ -0,0 +1,65 @@ +{-# LANGUAGE CPP #-} +#if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703 +{-# LANGUAGE Safe #-} +#endif + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Containers.ListUtils +-- Copyright : (c) Gershom Bazerman 2018 +-- License : BSD-style +-- Maintainer : libraries@haskell.org +-- Portability : portable +-- +-- This module provides efficient containers-based functions on the list type. +----------------------------------------------------------------------------- + +module Data.Containers.ListUtils ( + nubOrd, + nubOrdOn, + nubInt, + nubIntOn + ) where + +import qualified Data.Set as Set +import qualified Data.IntSet as IntSet + +-- | /O(n log n)/. The 'nubOrd' function removes duplicate elements from a list. +-- In particular, it keeps only the first occurrence of each element. By using a 'Set' internally +-- it has better asymptotics than the standard 'nub' function. +nubOrd :: (Ord a) => [a] -> [a] +nubOrd = go Set.empty + where + go _ [] = [] + go s (x:xs) = if x `Set.member` s then go s xs + else x : go (Set.insert x s) xs + +-- | The `nubOrdOn` function behaves just like `nubOrd` except it performs comparisons not on the +-- original datatype, but a user-specified projection from that datatype. +nubOrdOn :: (Ord b) => (a -> b) -> [a] -> [a] +nubOrdOn f = go Set.empty + where + go _ [] = [] + go s (x:xs) = let fx = f x + in if fx `Set.member` s then go s xs + else x : go (Set.insert fx s) xs + +-- | /O(n min(n,W))/. The 'nubInt' function removes duplicate elements from a list. +-- In particular, it keeps only the first occurrence of each element. By using an 'IntSet' internally +-- it has better asymptotics than the standard 'nub' function. +nubInt :: [Int] -> [Int] +nubInt = go IntSet.empty + where + go _ [] = [] + go s (x:xs) = if x `IntSet.member` s then go s xs + else x : go (IntSet.insert x s) xs + +-- | The `nubIntOn` function behaves just like 'nubInt' except it performs comparisons not on the +-- original datatype, but a user-specified projection from that datatype to 'Int'. +nubIntOn :: (a -> Int) -> [a] -> [a] +nubIntOn f = go IntSet.empty + where + go _ [] = [] + go s (x:xs) = let fx = f x + in if fx `IntSet.member` s then go s xs + else x : go (IntSet.insert fx s) xs diff --git a/containers.cabal b/containers.cabal index ec6b6ee5f..a4f281544 100644 --- a/containers.cabal +++ b/containers.cabal @@ -49,6 +49,7 @@ Library other-extensions: CPP, BangPatterns exposed-modules: + Data.Containers.ListUtils Data.IntMap Data.IntMap.Lazy Data.IntMap.Strict @@ -618,3 +619,22 @@ test-suite intset-strictness-properties ghc-options: -Wall include-dirs: include + +test-suite listutils-properties + hs-source-dirs: tests, . + main-is: listutils-properties.hs + other-modules: + Data.Containers.ListUtils + type: exitcode-stdio-1.0 + + build-depends: + base >= 4.3 && < 5, + ChasingBottoms, + deepseq >= 1.2 && < 1.5, + QuickCheck >= 2.7.1, + ghc-prim, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9 + + ghc-options: -Wall + include-dirs: include diff --git a/tests/listutils-properties.hs b/tests/listutils-properties.hs new file mode 100644 index 000000000..658c2d54e --- /dev/null +++ b/tests/listutils-properties.hs @@ -0,0 +1,27 @@ +module Main where + +import Data.List (nub, nubBy) +import Data.Containers.ListUtils +import Test.Framework +import Test.Framework.Providers.QuickCheck2 + +main :: IO () +main = defaultMain + [ testProperty "nubOrd" prop_nubOrd + , testProperty "nubOrdOn" prop_nubOrdOn + , testProperty "nubInt" prop_nubInt + , testProperty "nubIntOn" prop_nubIntOn + ] + + +prop_nubOrd :: [Int] -> Bool +prop_nubOrd xs = nubOrd xs == nub xs + +prop_nubInt :: [Int] -> Bool +prop_nubInt xs = nubInt xs == nub xs + +prop_nubOrdOn :: [(Int,Int)] -> Bool +prop_nubOrdOn xs = nubOrdOn snd xs == nubBy (\x y -> snd x == snd y) xs + +prop_nubIntOn :: [(Int,Int)] -> Bool +prop_nubIntOn xs = nubIntOn snd xs == nubBy (\x y -> snd x == snd y) xs