@@ -71,12 +71,10 @@ module System.Hatrace
7171
7272import Conduit (foldlC )
7373import Control.Arrow (second )
74- import Control.Monad (when )
7574import Control.Monad.IO.Class (MonadIO , liftIO )
7675import Control.Monad.IO.Unlift (MonadUnliftIO )
77- import Data.Bits ((.|.) , (.&.) , shiftL , shiftR )
76+ import Data.Bits ((.|.) , shiftL , shiftR )
7877import Data.ByteString (ByteString )
79- import qualified Data.ByteString as BS
8078import qualified Data.ByteString.Internal as BSI
8179import Data.Conduit
8280import qualified Data.Conduit.List as CL
@@ -1050,15 +1048,16 @@ getExePath pid = do
10501048
10511049
10521050data FileWriteEvent
1053- = FileOpen ByteString
1051+ = FileOpen ByteString -- ^ name used to open the file
10541052 | FileWrite
10551053 | FileClose
10561054 | FileRename ByteString -- ^ new (target) name
10571055 deriving (Eq , Ord , Show )
10581056
1057+ -- | Uses raw trace events to produce more focused events aimed at analysing file writes.
1058+ -- Output events are accompanied by corresponding absolute file paths.
1059+ --
10591060-- NOTES:
1060- -- * the code doesn't register `open` syscalls for files opened as readonly,
1061- -- at the same time this filter isn't applied for other syscalls (close, rename)
10621061-- * only calls to `write` are currently used as a marker for writes and syscalls
10631062-- `pwrite`, `writev`, `pwritev` are not taken into account
10641063fileWritesConduit :: (MonadIO m ) => ConduitT (CPid , TraceEvent ) (FilePath , FileWriteEvent ) m ()
@@ -1118,48 +1117,67 @@ data FileWriteBehavior
11181117 | Unexpected String
11191118 deriving (Eq , Ord , Show )
11201119
1120+ -- uses state machine implemented as recursive functions
11211121analyzeWrites :: [FileWriteEvent ] -> FileWriteBehavior
1122- analyzeWrites events = checkOpen events
1122+ analyzeWrites es = checkOpen es
11231123 where
1124- checkOpen [] = NoWrites
1125- -- we could see a close syscall for a file opened in readonly mode
1126- -- thus we just ignore it
1127- checkOpen (FileClose : es) = checkOpen es
1128- checkOpen (FileOpen _: es) = checkWrites es
1129- checkOpen (e: _) = unexpected " FileOpen" e
1130- checkWrites [] = Unexpected $ " FileClose was expected but not seen"
1131- checkWrites (FileClose : es) = checkOpen es
1132- checkWrites (FileWrite : es) = checkWrites' es
1133- checkWrites (e: _) = unexpected " FileClose or FileWrite" e
1134- checkWrites' [] = Unexpected $ " FileClose was expected but not seen"
1135- checkWrites' (FileWrite : es) = checkWrites' es
1136- checkWrites' (FileClose : es) = checkRename es
1137- checkWrites' (e: _) = unexpected " FileClose or FileWrite" e
1138- checkRename (FileRename path: es) =
1139- case checkOpen es of
1140- NoWrites ->
1141- -- we write original path here which swapped
1142- -- with oldpath in `atomicWritesSink`
1143- AtomicWrite (T. unpack $ decodeUtf8OrError path)
1144- other ->
1145- other
1146- checkRename es =
1147- case checkOpen es of
1148- NoWrites -> NonatomicWrite
1149- other -> other
1150- unexpected expected real =
1124+ checkOpen events =
1125+ case events of
1126+ [] -> NoWrites
1127+ -- we could see a `close` syscall for a pipe descriptor
1128+ -- with no `open` for it thus we just ignore it
1129+ FileClose : rest -> checkOpen rest
1130+ FileOpen _ : rest -> checkWrites rest
1131+ unexpected : _ -> unexpectedEvent " FileOpen" unexpected
1132+ checkWrites events =
1133+ case events of
1134+ [] -> Unexpected $ " FileClose was expected but not seen"
1135+ FileClose : rest -> checkOpen rest
1136+ FileWrite : rest -> checkAfterWrite rest
1137+ unexpected : _ -> unexpectedEvent " FileClose or FileWrite" unexpected
1138+ checkAfterWrite events =
1139+ case events of
1140+ [] -> Unexpected $ " FileClose was expected but not seen"
1141+ FileWrite : rest -> checkAfterWrite rest
1142+ FileClose : rest -> checkRename rest
1143+ unexpected : _ -> unexpectedEvent " FileClose or FileWrite" unexpected
1144+ -- when it happens that a path gets more than 1 sequence open-write-close
1145+ -- for it we need to check whether there was a `rename` after the 1st one
1146+ -- and then check the result of the next one and combine them accordingly
1147+ -- e.g. atomic + non-atomic -> non-atomic
1148+ checkRename events =
1149+ case events of
1150+ FileRename path : rest ->
1151+ case checkOpen rest of
1152+ NoWrites ->
1153+ -- we write original path here which swapped
1154+ -- with oldpath in `atomicWritesSink`
1155+ AtomicWrite (T. unpack $ decodeUtf8OrError path)
1156+ other ->
1157+ other
1158+ noRenames ->
1159+ case checkOpen noRenames of
1160+ NoWrites -> NonatomicWrite
1161+ other -> other
1162+ unexpectedEvent expected real =
11511163 Unexpected $ " expected " ++ expected ++ " , but " ++
11521164 show real ++ " was seen"
11531165
11541166atomicWritesSink :: (MonadIO m ) => ConduitT (CPid , TraceEvent ) Void m (Map FilePath FileWriteBehavior )
11551167atomicWritesSink =
1156- extract <$> (fileWritesConduit .| foldlC collectWrite mempty )
1168+ extract <$> (fileWritesConduit .| foldlC collectWrite Map. empty )
11571169 where
1170+ collectWrite :: Map FilePath [FileWriteEvent ] -> (FilePath , FileWriteEvent ) -> Map FilePath [FileWriteEvent ]
11581171 collectWrite m (fp, e) = Map. alter (Just . maybe [e] (e: )) fp m
1172+ extract :: Map FilePath [FileWriteEvent ] -> Map FilePath FileWriteBehavior
11591173 extract m =
11601174 let (noRenames, renames) =
11611175 partitionEithers . map (analyzeWrites' . second reverse ) $ Map. toList m
11621176 in Map. fromList noRenames <> Map. fromList (map (second AtomicWrite ) renames)
1177+ -- this function (in addition to what `analyzeWrites` does) treats atomic writes
1178+ -- in a special way: those include a rename and we need to put atomic writes under
1179+ -- a path which is a target of a corresponding rename
1180+ -- so in the end we swap path in `AtomicWrite` and its corresponding map key
11631181 analyzeWrites' (src, es) = case analyzeWrites es of
11641182 AtomicWrite target -> Right (target, src)
11651183 other -> Left (src, other)
0 commit comments