From 80e245c62f06c21783fa779905cba1ff954d2aff Mon Sep 17 00:00:00 2001 From: Gershom Date: Thu, 25 Jan 2018 16:51:50 -0500 Subject: [PATCH 1/2] add ordnub and friends --- Data/Containers/ListUtils.hs | 65 ++++++++++++++++++++++++++++++++++++ containers.cabal | 1 + 2 files changed, 66 insertions(+) create mode 100644 Data/Containers/ListUtils.hs diff --git a/Data/Containers/ListUtils.hs b/Data/Containers/ListUtils.hs new file mode 100644 index 000000000..64e667c74 --- /dev/null +++ b/Data/Containers/ListUtils.hs @@ -0,0 +1,65 @@ +{-# LANGUAGE CPP #-} +#if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703 +{-# LANGUAGE Safe #-} +#endif + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Containers.ListUtils +-- Copyright : (c) Gershom Bazerman 2018 +-- License : BSD-style +-- Maintainer : libraries@haskell.org +-- Portability : portable +-- +-- This module provides efficient containers-based functions on the list type. +----------------------------------------------------------------------------- + +module Data.Containers.ListUtils ( + nubOrd, + nubOrdOn, + nubInt, + nubIntOn + ) where + +import qualified Data.Set as Set +import qualified Data.IntSet as IntSet + +-- | /O(n log n)/. The 'nubOrd' function removes duplicate elements from a list. +-- In particular, it keeps only the first occurrence of each element. By using a 'Set' internally +-- it has better asymptotics than the standard 'nub' function. +nubOrd :: (Ord a) => [a] -> [a] +nubOrd l = go Set.empty l + where + go _ [] = [] + go s (x:xs) = if x `Set.member` s then go s xs + else x : go (Set.insert x s) xs + +-- | The `nubOrdOn` function behaves just like `nubOrd` except it preforms comparisons not on the +-- original datatype, but a user-specified projection from that datatype. +nubOrdOn :: (Ord b) => (a -> b) -> [a] -> [a] +nubOrdOn f l = go Set.empty l + where + go _ [] = [] + go s (x:xs) = let fx = f x + in if fx `Set.member` s then go s xs + else x : go (Set.insert fx s) xs + +-- | /O(n min(n,W))/. The 'nubInt' function removes duplicate elements from a list. +-- In particular, it keeps only the first occurrence of each element. By using an 'IntSet' internally +-- it has better asymptotics than the standard 'nub' function. +nubInt :: [Int] -> [Int] +nubInt l = go IntSet.empty l + where + go _ [] = [] + go s (x:xs) = if x `IntSet.member` s then go s xs + else x : go (IntSet.insert x s) xs + +-- | The `nubIntOn` function behaves just like 'nubInt' except it preforms comparisons not on the +-- original datatype, but a user-specified projection from that datatype to 'Int'. +nubIntOn :: (a -> Int) -> [a] -> [a] +nubIntOn f l = go IntSet.empty l + where + go _ [] = [] + go s (x:xs) = let fx = f x + in if fx `IntSet.member` s then go s xs + else x : go (IntSet.insert fx s) xs diff --git a/containers.cabal b/containers.cabal index ec6b6ee5f..6722d948a 100644 --- a/containers.cabal +++ b/containers.cabal @@ -49,6 +49,7 @@ Library other-extensions: CPP, BangPatterns exposed-modules: + Data.Containers.ListUtils Data.IntMap Data.IntMap.Lazy Data.IntMap.Strict From c787d87fdb709644575e48d509fb757b62695894 Mon Sep 17 00:00:00 2001 From: Gershom Date: Fri, 26 Jan 2018 01:25:38 -0500 Subject: [PATCH 2/2] tests, codereview --- Data/Containers/ListUtils.hs | 12 ++++++------ containers.cabal | 19 +++++++++++++++++++ tests/listutils-properties.hs | 27 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 tests/listutils-properties.hs diff --git a/Data/Containers/ListUtils.hs b/Data/Containers/ListUtils.hs index 64e667c74..f8885f313 100644 --- a/Data/Containers/ListUtils.hs +++ b/Data/Containers/ListUtils.hs @@ -28,16 +28,16 @@ import qualified Data.IntSet as IntSet -- In particular, it keeps only the first occurrence of each element. By using a 'Set' internally -- it has better asymptotics than the standard 'nub' function. nubOrd :: (Ord a) => [a] -> [a] -nubOrd l = go Set.empty l +nubOrd = go Set.empty where go _ [] = [] go s (x:xs) = if x `Set.member` s then go s xs else x : go (Set.insert x s) xs --- | The `nubOrdOn` function behaves just like `nubOrd` except it preforms comparisons not on the +-- | The `nubOrdOn` function behaves just like `nubOrd` except it performs comparisons not on the -- original datatype, but a user-specified projection from that datatype. nubOrdOn :: (Ord b) => (a -> b) -> [a] -> [a] -nubOrdOn f l = go Set.empty l +nubOrdOn f = go Set.empty where go _ [] = [] go s (x:xs) = let fx = f x @@ -48,16 +48,16 @@ nubOrdOn f l = go Set.empty l -- In particular, it keeps only the first occurrence of each element. By using an 'IntSet' internally -- it has better asymptotics than the standard 'nub' function. nubInt :: [Int] -> [Int] -nubInt l = go IntSet.empty l +nubInt = go IntSet.empty where go _ [] = [] go s (x:xs) = if x `IntSet.member` s then go s xs else x : go (IntSet.insert x s) xs --- | The `nubIntOn` function behaves just like 'nubInt' except it preforms comparisons not on the +-- | The `nubIntOn` function behaves just like 'nubInt' except it performs comparisons not on the -- original datatype, but a user-specified projection from that datatype to 'Int'. nubIntOn :: (a -> Int) -> [a] -> [a] -nubIntOn f l = go IntSet.empty l +nubIntOn f = go IntSet.empty where go _ [] = [] go s (x:xs) = let fx = f x diff --git a/containers.cabal b/containers.cabal index 6722d948a..a4f281544 100644 --- a/containers.cabal +++ b/containers.cabal @@ -619,3 +619,22 @@ test-suite intset-strictness-properties ghc-options: -Wall include-dirs: include + +test-suite listutils-properties + hs-source-dirs: tests, . + main-is: listutils-properties.hs + other-modules: + Data.Containers.ListUtils + type: exitcode-stdio-1.0 + + build-depends: + base >= 4.3 && < 5, + ChasingBottoms, + deepseq >= 1.2 && < 1.5, + QuickCheck >= 2.7.1, + ghc-prim, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9 + + ghc-options: -Wall + include-dirs: include diff --git a/tests/listutils-properties.hs b/tests/listutils-properties.hs new file mode 100644 index 000000000..658c2d54e --- /dev/null +++ b/tests/listutils-properties.hs @@ -0,0 +1,27 @@ +module Main where + +import Data.List (nub, nubBy) +import Data.Containers.ListUtils +import Test.Framework +import Test.Framework.Providers.QuickCheck2 + +main :: IO () +main = defaultMain + [ testProperty "nubOrd" prop_nubOrd + , testProperty "nubOrdOn" prop_nubOrdOn + , testProperty "nubInt" prop_nubInt + , testProperty "nubIntOn" prop_nubIntOn + ] + + +prop_nubOrd :: [Int] -> Bool +prop_nubOrd xs = nubOrd xs == nub xs + +prop_nubInt :: [Int] -> Bool +prop_nubInt xs = nubInt xs == nub xs + +prop_nubOrdOn :: [(Int,Int)] -> Bool +prop_nubOrdOn xs = nubOrdOn snd xs == nubBy (\x y -> snd x == snd y) xs + +prop_nubIntOn :: [(Int,Int)] -> Bool +prop_nubIntOn xs = nubIntOn snd xs == nubBy (\x y -> snd x == snd y) xs