Skip to content

Commit 7bca32d

Browse files
qrilkanh2
authored andcommitted
Add some haddocks and improve code readability
1 parent eef13c0 commit 7bca32d

File tree

1 file changed

+53
-35
lines changed

1 file changed

+53
-35
lines changed

src/System/Hatrace.hs

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,10 @@ module System.Hatrace
7171

7272
import Conduit (foldlC)
7373
import Control.Arrow (second)
74-
import Control.Monad (when)
7574
import Control.Monad.IO.Class (MonadIO, liftIO)
7675
import Control.Monad.IO.Unlift (MonadUnliftIO)
77-
import Data.Bits ((.|.), (.&.), shiftL, shiftR)
76+
import Data.Bits ((.|.), shiftL, shiftR)
7877
import Data.ByteString (ByteString)
79-
import qualified Data.ByteString as BS
8078
import qualified Data.ByteString.Internal as BSI
8179
import Data.Conduit
8280
import qualified Data.Conduit.List as CL
@@ -1050,15 +1048,16 @@ getExePath pid = do
10501048

10511049

10521050
data FileWriteEvent
1053-
= FileOpen ByteString
1051+
= FileOpen ByteString -- ^ name used to open the file
10541052
| FileWrite
10551053
| FileClose
10561054
| FileRename ByteString -- ^ new (target) name
10571055
deriving (Eq, Ord, Show)
10581056

1057+
-- | Uses raw trace events to produce more focused events aimed at analysing file writes.
1058+
-- Output events are accompanied by corresponding absolute file paths.
1059+
--
10591060
-- NOTES:
1060-
-- * the code doesn't register `open` syscalls for files opened as readonly,
1061-
-- at the same time this filter isn't applied for other syscalls (close, rename)
10621061
-- * only calls to `write` are currently used as a marker for writes and syscalls
10631062
-- `pwrite`, `writev`, `pwritev` are not taken into account
10641063
fileWritesConduit :: (MonadIO m) => ConduitT (CPid, TraceEvent) (FilePath, FileWriteEvent) m ()
@@ -1118,48 +1117,67 @@ data FileWriteBehavior
11181117
| Unexpected String
11191118
deriving (Eq, Ord, Show)
11201119

1120+
-- uses state machine implemented as recursive functions
11211121
analyzeWrites :: [FileWriteEvent] -> FileWriteBehavior
1122-
analyzeWrites events = checkOpen events
1122+
analyzeWrites es = checkOpen es
11231123
where
1124-
checkOpen [] = NoWrites
1125-
-- we could see a close syscall for a file opened in readonly mode
1126-
-- thus we just ignore it
1127-
checkOpen (FileClose:es) = checkOpen es
1128-
checkOpen (FileOpen _:es) = checkWrites es
1129-
checkOpen (e:_) = unexpected "FileOpen" e
1130-
checkWrites [] = Unexpected $ "FileClose was expected but not seen"
1131-
checkWrites (FileClose:es) = checkOpen es
1132-
checkWrites (FileWrite:es) = checkWrites' es
1133-
checkWrites (e: _) = unexpected "FileClose or FileWrite" e
1134-
checkWrites' [] = Unexpected $ "FileClose was expected but not seen"
1135-
checkWrites' (FileWrite:es) = checkWrites' es
1136-
checkWrites' (FileClose:es) = checkRename es
1137-
checkWrites' (e: _) = unexpected "FileClose or FileWrite" e
1138-
checkRename (FileRename path:es) =
1139-
case checkOpen es of
1140-
NoWrites ->
1141-
-- we write original path here which swapped
1142-
-- with oldpath in `atomicWritesSink`
1143-
AtomicWrite (T.unpack $ decodeUtf8OrError path)
1144-
other ->
1145-
other
1146-
checkRename es =
1147-
case checkOpen es of
1148-
NoWrites -> NonatomicWrite
1149-
other -> other
1150-
unexpected expected real =
1124+
checkOpen events =
1125+
case events of
1126+
[] -> NoWrites
1127+
-- we could see a `close` syscall for a pipe descriptor
1128+
-- with no `open` for it thus we just ignore it
1129+
FileClose : rest -> checkOpen rest
1130+
FileOpen _ : rest -> checkWrites rest
1131+
unexpected : _ -> unexpectedEvent "FileOpen" unexpected
1132+
checkWrites events =
1133+
case events of
1134+
[] -> Unexpected $ "FileClose was expected but not seen"
1135+
FileClose : rest -> checkOpen rest
1136+
FileWrite : rest -> checkAfterWrite rest
1137+
unexpected : _ -> unexpectedEvent "FileClose or FileWrite" unexpected
1138+
checkAfterWrite events =
1139+
case events of
1140+
[] -> Unexpected $ "FileClose was expected but not seen"
1141+
FileWrite : rest -> checkAfterWrite rest
1142+
FileClose : rest -> checkRename rest
1143+
unexpected : _ -> unexpectedEvent "FileClose or FileWrite" unexpected
1144+
-- when it happens that a path gets more than 1 sequence open-write-close
1145+
-- for it we need to check whether there was a `rename` after the 1st one
1146+
-- and then check the result of the next one and combine them accordingly
1147+
-- e.g. atomic + non-atomic -> non-atomic
1148+
checkRename events =
1149+
case events of
1150+
FileRename path : rest ->
1151+
case checkOpen rest of
1152+
NoWrites ->
1153+
-- we write original path here which swapped
1154+
-- with oldpath in `atomicWritesSink`
1155+
AtomicWrite (T.unpack $ decodeUtf8OrError path)
1156+
other ->
1157+
other
1158+
noRenames ->
1159+
case checkOpen noRenames of
1160+
NoWrites -> NonatomicWrite
1161+
other -> other
1162+
unexpectedEvent expected real =
11511163
Unexpected $ "expected " ++ expected ++ ", but " ++
11521164
show real ++ " was seen"
11531165

11541166
atomicWritesSink :: (MonadIO m) => ConduitT (CPid, TraceEvent) Void m (Map FilePath FileWriteBehavior)
11551167
atomicWritesSink =
1156-
extract <$> (fileWritesConduit .| foldlC collectWrite mempty)
1168+
extract <$> (fileWritesConduit .| foldlC collectWrite Map.empty)
11571169
where
1170+
collectWrite :: Map FilePath [FileWriteEvent] -> (FilePath, FileWriteEvent) -> Map FilePath [FileWriteEvent]
11581171
collectWrite m (fp, e) = Map.alter (Just . maybe [e] (e:)) fp m
1172+
extract :: Map FilePath [FileWriteEvent] -> Map FilePath FileWriteBehavior
11591173
extract m =
11601174
let (noRenames, renames) =
11611175
partitionEithers . map (analyzeWrites' . second reverse) $ Map.toList m
11621176
in Map.fromList noRenames <> Map.fromList (map (second AtomicWrite) renames)
1177+
-- this function (in addition to what `analyzeWrites` does) treats atomic writes
1178+
-- in a special way: those include a rename and we need to put atomic writes under
1179+
-- a path which is a target of a corresponding rename
1180+
-- so in the end we swap path in `AtomicWrite` and its corresponding map key
11631181
analyzeWrites' (src, es) = case analyzeWrites es of
11641182
AtomicWrite target -> Right (target, src)
11651183
other -> Left (src, other)

0 commit comments

Comments
 (0)