From a7b1ce2b2320b4c2eca98db06ae2dc4503931490 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:39:45 +1300 Subject: [PATCH 01/16] [ML] Report the "actual" memory usage of the autodetect process Determine the actual memory usgae of the autodetect process as reported by the OS, e.g. on Linux this mould be the value of the maximum resident set size returned by a call to `getrusage`. Add this value to the model size stats record returned to the ES Java process so it can be included in the `job counts` tab for anomaly detection jobs. --- bin/autodetect/Main.cc | 6 +- include/core/CProgramCounters.h | 7 ++- include/model/CResourceMonitor.h | 3 + include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 4 ++ lib/api/unittest/CAnomalyJobLimitTest.cc | 31 +++++++++- lib/api/unittest/CJsonOutputWriterTest.cc | 59 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 1 + lib/core/CProcessStats_Linux.cc | 7 ++- lib/core/CProcessStats_MacOSX.cc | 9 ++- lib/core/CProcessStats_Windows.cc | 12 +++- lib/model/CResourceMonitor.cc | 6 ++ lib/model/ModelTypes.cc | 2 + lib/model/unittest/CResourceMonitorTest.cc | 14 ++++- 14 files changed, 126 insertions(+), 39 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 904920e3db..bbb90c706a 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures, ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, - ml::counter_t::E_TSADOutputMemoryAllocatorUsage}; + ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -151,6 +153,8 @@ int main(int argc, char** argv) { } cancellerThread.stop(); + LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 3c4d10269f..34d5cdbb26 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,6 +112,9 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The maximum resident set size of the process, in bytes. + E_TSADMaxResidentSetSize = 31, + // Data Frame Outlier Detection //! The estimated peak memory usage for outlier detection in bytes @@ -146,7 +149,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 31 + E_LastEnumCounter = 32 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -355,6 +358,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 5c7583888b..c9c887281f 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,6 +54,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; + std::size_t s_ActualMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -180,6 +181,8 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; + std::size_t actualMemoryUsage() const; + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index acbcc14c04..aeffe27e83 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,9 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size + E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size + E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 43fef49602..75604c7f6a 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,6 +25,7 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; +const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -60,6 +61,9 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); + writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); + writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index b003e90a53..938892589c 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -9,6 +9,7 @@ * limitation. */ #include +#include #include #include @@ -92,6 +93,10 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; + std::size_t actualUsage{0}; + std::size_t baseline{0}; + std::size_t nonLimitedAdjustedActualUsage{0}; + std::size_t limitedAdjustedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -105,6 +110,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + //limits.resourceMonitor().m_ByteLimitHigh = 100000; //limits.resourceMonitor().m_ByteLimitLow = 90000; @@ -127,8 +134,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedAdjustedActualUsage = actualUsage - baseline; } } + LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -138,6 +152,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -166,11 +182,18 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedAdjustedActualUsage = actualUsage - baseline; } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); } } @@ -375,6 +398,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -384,6 +408,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -423,11 +448,12 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -437,6 +463,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -479,6 +506,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -486,6 +514,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ba44163e7c..b4b9db851a 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,21 +1728,22 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ByFields = 5; - resourceUsage.s_PartitionFields = 6; - resourceUsage.s_OverFields = 7; - resourceUsage.s_AllocationFailures = 8; + resourceUsage.s_ActualMemoryUsage = 5; + resourceUsage.s_ByFields = 6; + resourceUsage.s_PartitionFields = 7; + resourceUsage.s_OverFields = 8; + resourceUsage.s_AllocationFailures = 9; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes; - resourceUsage.s_BucketStartTime = 9; - resourceUsage.s_BytesExceeded = 10; - resourceUsage.s_BytesMemoryLimit = 11; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; + resourceUsage.s_BucketStartTime = 10; + resourceUsage.s_BytesExceeded = 11; + resourceUsage.s_BytesMemoryLimit = 12; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1770,44 +1771,46 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("total_partition_field_count").to_number()); + 7, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("bucket_allocation_failures_count").to_number()); + 9, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("current_model_bytes", + BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 11, sizeStats.at("model_bytes_memory_limit").to_number()); + 12, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 14, sizeStats.at("frequent_category_count").to_number()); + 15, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index ff404b7f91..eb4d382b9a 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,6 +36,7 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) + 409600, // Actual memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e5ab8cdfd0..b0c02425e9 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -87,7 +88,11 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - return static_cast(rusage.ru_maxrss * 1024L); + std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + + return maxRSS; } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 9aa1e969c9..f1c55e2aae 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -8,9 +8,11 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include +#include +#include + #include #include #include @@ -31,9 +33,10 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - + std::size_t maxRSS = static_cast(rusage.ru_maxrss); + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss); + return maxRSS; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 7ca2d7e6c0..78cb418d93 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -8,8 +8,10 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include + +#include +#include #include #include @@ -36,7 +38,13 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError()); return 0; } - return static_cast(stats.PeakWorkingSetSize); + + std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + + return peakWorkingSetSize; } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index d93b3b8bd8..a0b74ed6f3 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -382,6 +383,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -491,5 +493,9 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } +std::size_t CResourceMonitor::actualMemoryUsage() const { + return core::CProcessStats::maxResidentSetSize(); +} + } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 2fab1d1c2a..a9140a66b3 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,6 +1733,8 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; + case E_AssignmentBasisActualMemoryUsageBytes: + return "actual_memory_usage_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f69dccc384..f8fe8fcc89 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) { } BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { - // Clear the counter so that other test cases do not interfere. + // Clear the counters so that other test cases do not interfere. core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0; CLimits limits; @@ -549,6 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -556,6 +559,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -563,12 +569,18 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); + + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From cb957cacfaa15484196a48c42467a8a5ab7c8040 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:53:50 +1300 Subject: [PATCH 02/16] Update changelog --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 75117ca066..0b118eb82b 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes From 8f73f02d7486068b1d43f4a337b0593fcdd1ef12 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 16:28:53 +1300 Subject: [PATCH 03/16] Formatting --- include/model/ModelTypes.h | 2 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 2 +- lib/api/unittest/CJsonOutputWriterTest.cc | 3 ++- lib/core/CProcessStats_Windows.cc | 1 - lib/model/unittest/CResourceMonitorTest.cc | 12 ++++++++---- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index aeffe27e83..ef09536551 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -721,7 +721,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 938892589c..f1531f46ec 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -448,7 +448,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index b4b9db851a..95c5e319b8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,7 +1772,8 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 78cb418d93..d6e6e4649c 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -41,7 +41,6 @@ std::size_t CProcessStats::maxResidentSetSize() { std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; return peakWorkingSetSize; diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f8fe8fcc89..193746addc 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -550,7 +550,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -560,7 +561,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(-50); @@ -570,7 +572,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -580,7 +583,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From d3a39aedc57c7de332d81e3ce8797df5b142e409 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:43:50 +1200 Subject: [PATCH 04/16] Appease SonarQube --- lib/core/CProcessStats_Linux.cc | 2 +- lib/core/CProcessStats_MacOSX.cc | 2 +- lib/core/CProcessStats_Windows.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index b0c02425e9..c858b4e589 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -88,7 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index f1c55e2aae..c3edc85424 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -33,7 +33,7 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - std::size_t maxRSS = static_cast(rusage.ru_maxrss); + auto maxRSS = static_cast(rusage.ru_maxrss); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes return maxRSS; diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d6e6e4649c..d91db5e3c9 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -39,7 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; From e2d1bf5f0e2a04bd772fa77e7ea96a8582cf6f00 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:46:27 +1200 Subject: [PATCH 05/16] Tweak unit test for platform portability --- lib/api/unittest/CAnomalyJobLimitTest.cc | 32 +++++++----------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index f1531f46ec..348b4162db 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,10 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t actualUsage{0}; - std::size_t baseline{0}; - std::size_t nonLimitedAdjustedActualUsage{0}; - std::size_t limitedAdjustedActualUsage{0}; + std::size_t nonLimitedActualUsage{0}; + std::size_t limitedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -110,10 +108,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - - //limits.resourceMonitor().m_ByteLimitHigh = 100000; - //limits.resourceMonitor().m_ByteLimitLow = 90000; { LOG_TRACE(<< "Setting up job"); @@ -134,15 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - nonLimitedAdjustedActualUsage = actualUsage - baseline; + nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -152,8 +143,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -182,18 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - limitedAdjustedActualUsage = actualUsage - baseline; + limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); + LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); + LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); } } From 1a9a99ae5e69956976caea9d0963c70658efb25d Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 9 Apr 2025 16:01:06 +1200 Subject: [PATCH 06/16] Attend to review comments * ActualMemory -> SystemMemory * Report current resident set size as well as max --- bin/autodetect/Main.cc | 1 + include/core/CProgramCounters.h | 9 ++- include/model/CResourceMonitor.h | 9 ++- include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 10 ++- lib/api/unittest/CAnomalyJobLimitTest.cc | 33 +++++----- lib/api/unittest/CJsonOutputWriterTest.cc | 66 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 7 +- lib/core/CProcessStats_Linux.cc | 2 + lib/core/CProcessStats_MacOSX.cc | 1 + lib/core/CProcessStats_Windows.cc | 6 +- lib/model/CResourceMonitor.cc | 10 ++- lib/model/ModelTypes.cc | 6 +- lib/model/unittest/CResourceMonitorTest.cc | 16 ++--- 14 files changed, 110 insertions(+), 70 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index bbb90c706a..11f7674634 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,6 +85,7 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADResidentSetSize, ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 34d5cdbb26..9bad82389a 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,8 +112,11 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The resident set size of the process, in bytes. + E_TSADResidentSetSize = 31, + //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 31, + E_TSADMaxResidentSetSize = 32, // Data Frame Outlier Detection @@ -149,7 +152,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 32 + E_LastEnumCounter = 33 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -358,6 +361,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + "The resident set size of the process, in bytes"}, {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index c9c887281f..8cb9d5f3ac 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; - std::size_t s_ActualMemoryUsage{0}; + std::size_t s_SystemMemoryUsage{0}; + std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -181,7 +182,11 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - std::size_t actualMemoryUsage() const; + //! Returns the current physical memory of the process as reported by the system + std::size_t systemMemory() const; + + //! Returns the maximum physical memory of the processs as reported by the system + std::size_t maxSystemMemory() const; private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index ef09536551..66674ed39d 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -720,8 +720,8 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size + E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 75604c7f6a..12858914af 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,7 +25,8 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; -const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; +const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"}; +const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -61,8 +62,11 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); - writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_SystemMemoryUsage); + + writer.onKey(MAX_SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_MaxSystemMemoryUsage); writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 348b4162db..cc85baaf99 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,8 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t nonLimitedActualUsage{0}; - std::size_t limitedActualUsage{0}; + std::size_t nonLimitedMaxSystemUsage{0}; + std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -128,12 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); + BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -171,15 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); - LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); + LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); + LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -384,7 +384,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -394,7 +395,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -439,7 +440,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -449,7 +451,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -492,7 +494,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -500,7 +503,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 95c5e319b8..82c11c31f2 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,22 +1728,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ActualMemoryUsage = 5; - resourceUsage.s_ByFields = 6; - resourceUsage.s_PartitionFields = 7; - resourceUsage.s_OverFields = 8; - resourceUsage.s_AllocationFailures = 9; + resourceUsage.s_SystemMemoryUsage = 5; + resourceUsage.s_MaxSystemMemoryUsage = 6; + resourceUsage.s_ByFields = 7; + resourceUsage.s_PartitionFields = 8; + resourceUsage.s_OverFields = 9; + resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; - resourceUsage.s_BucketStartTime = 10; - resourceUsage.s_BytesExceeded = 11; - resourceUsage.s_BytesMemoryLimit = 12; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_BucketStartTime = 11; + resourceUsage.s_BytesExceeded = 12; + resourceUsage.s_BytesMemoryLimit = 13; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1771,47 +1772,50 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("actual_memory_usage_bytes").to_number()); + 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); + BOOST_REQUIRE_EQUAL( + 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 7, sizeStats.at("total_partition_field_count").to_number()); + 8, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 9, sizeStats.at("bucket_allocation_failures_count").to_number()); + 10, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", + BOOST_REQUIRE_EQUAL("system_memory_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 12, sizeStats.at("model_bytes_memory_limit").to_number()); + 13, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 15, sizeStats.at("frequent_category_count").to_number()); + 16, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index eb4d382b9a..c3c32aa022 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,7 +36,8 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) - 409600, // Actual memory used (max rss) + 409600, // System memory used (rss) + 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields @@ -117,6 +118,10 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 409600, modelSizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index c858b4e589..6511a209f1 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,6 +76,8 @@ std::size_t CProcessStats::residentSetSize() { } } + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + return rss; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index c3edc85424..855b0bdca9 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,6 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; return 0; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d91db5e3c9..e4e2baa163 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,7 +29,11 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - return static_cast(stats.WorkingSetSize); + auto workingSetSize = static_cast(stats.WorkingSetSize); + + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + + return workingSetSize; } std::size_t CProcessStats::maxResidentSetSize() { diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index a0b74ed6f3..69354f5629 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -383,7 +383,8 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); + res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); + res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -493,9 +494,12 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::actualMemoryUsage() const { - return core::CProcessStats::maxResidentSetSize(); +std::size_t CResourceMonitor::systemMemory() const { + return core::CProcessStats::residentSetSize(); } +std::size_t CResourceMonitor::maxSystemMemory() const { + return core::CProcessStats::maxResidentSetSize(); +} } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index a9140a66b3..1bddf8bae8 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,8 +1733,10 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisActualMemoryUsageBytes: - return "actual_memory_usage_bytes"; + case E_AssignmentBasisSystemMemoryBytes: + return "system_memory_bytes"; + case E_AssignmentBasisMaxSystemMemoryBytes: + return "max_system_memory_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index 193746addc..c17c644c80 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -549,9 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_SystemMemoryUsage); monitor.addExtraMemory(100); @@ -560,9 +560,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(-50); @@ -571,9 +571,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(100); @@ -582,9 +582,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From 5ae22cb50c89f8176df8eca696684d294f16cf69 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:27 +1200 Subject: [PATCH 07/16] Update bin/autodetect/Main.cc Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- bin/autodetect/Main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..93cb77e5d7 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -155,7 +155,7 @@ int main(int argc, char** argv) { cancellerThread.stop(); LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); - +LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. From fe6f1fac7df9daf4b6a7a8f42c0ea3ce0f668a32 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:49 +1200 Subject: [PATCH 08/16] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..d1815efa81 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system From 582430ea4fbfbd0d5cf486af35fc4f469e0c50d0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:09:09 +1200 Subject: [PATCH 09/16] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index d1815efa81..ff156d007d 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -185,7 +185,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; - //! Returns the maximum physical memory of the processs as reported by the system + //! Returns the maximum physical memory of the process (max rss) as reported by the system std::size_t maxSystemMemory() const; private: From 9476edee78422a088b9c94d104680203d1c21bf2 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 14:59:43 +1200 Subject: [PATCH 10/16] Attend to review comments --- bin/autodetect/Main.cc | 6 +++--- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 2 +- include/model/ModelTypes.h | 4 +--- lib/api/unittest/CJsonOutputWriterTest.cc | 6 +++--- lib/core/CProcessStats_Linux.cc | 4 ++-- lib/core/CProcessStats_MacOSX.cc | 4 ++-- lib/core/CProcessStats_Windows.cc | 4 ++-- lib/model/ModelTypes.cc | 4 ---- 9 files changed, 18 insertions(+), 24 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..6037d4ba43 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,8 +85,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, - ml::counter_t::E_TSADResidentSetSize, - ml::counter_t::E_TSADMaxResidentSetSize}; + ml::counter_t::E_TSADSystemMemoryUsage, + ml::counter_t::E_TSADMaxSystemMemoryUsage}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -154,7 +154,7 @@ int main(int argc, char** argv) { } cancellerThread.stop(); - LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 9bad82389a..fd9c949a5d 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -113,10 +113,10 @@ enum ECounterTypes { E_TSADOutputMemoryAllocatorUsage = 30, //! The resident set size of the process, in bytes. - E_TSADResidentSetSize = 31, + E_TSADSystemMemoryUsage = 31, //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 32, + E_TSADMaxSystemMemoryUsage = 32, // Data Frame Outlier Detection @@ -361,9 +361,9 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..cd8ea1ad58 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory (rss) of the process as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 66674ed39d..9494935b92 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,9 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size - E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size + E_AssignmentBasisPeakModelBytes = 3 }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 82c11c31f2..495fd77a52 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1735,7 +1735,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_OverFields = 9; resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; resourceUsage.s_BucketStartTime = 11; resourceUsage.s_BytesExceeded = 12; resourceUsage.s_BytesMemoryLimit = 13; @@ -1793,7 +1793,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("system_memory_bytes", + BOOST_REQUIRE_EQUAL("peak_model_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index 6511a209f1..ecaaf7f272 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,7 +76,7 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; return rss; } @@ -92,7 +92,7 @@ std::size_t CProcessStats::maxResidentSetSize() { // ru_maxrss is in kilobytes auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; return maxRSS; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 855b0bdca9..d308463c4e 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; // ru_maxrss is in bytes return maxRSS; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index e4e2baa163..cc88daad75 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -31,7 +31,7 @@ std::size_t CProcessStats::residentSetSize() { auto workingSetSize = static_cast(stats.WorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; return workingSetSize; } @@ -45,7 +45,7 @@ std::size_t CProcessStats::maxResidentSetSize() { auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; return peakWorkingSetSize; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 1bddf8bae8..2fab1d1c2a 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,10 +1733,6 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisSystemMemoryBytes: - return "system_memory_bytes"; - case E_AssignmentBasisMaxSystemMemoryBytes: - return "max_system_memory_bytes"; } return "-"; } From 475fef1aec8111be815bae561bdc1b82d7bb37fb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 15:12:47 +1200 Subject: [PATCH 11/16] Formatting --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 495fd77a52..ea16f9a19d 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1773,8 +1773,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); BOOST_REQUIRE_EQUAL( 6, sizeStats.at("max_system_memory_bytes").to_number()); From 6945d3fd1d439581526320677a5009d8425c5add Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 May 2025 15:50:37 +1200 Subject: [PATCH 12/16] Attend to failing unit tests (hopefully) * Address failing unit tests * More accurate, meaningful description of new program counters --- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 4 ++-- include/model/ModelTypes.h | 2 +- lib/api/CAnomalyJob.cc | 2 ++ lib/core/CProcessStats_Linux.cc | 8 +------- lib/core/CProcessStats_MacOSX.cc | 6 ++---- lib/core/CProcessStats_Windows.cc | 12 ++---------- lib/model/CResourceMonitor.cc | 4 ++-- 8 files changed, 16 insertions(+), 30 deletions(-) diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index fd9c949a5d..4fde6670ff 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -361,10 +361,10 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", - "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", - "The maximum resident set size of the process, in bytes"}, + {counter_t::E_TSADSystemMemoryUsage, "E_TSADSystemMemoryUsage", + "The amount of system memory used by the process, in bytes"}, + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxSystemMemoryUsage", + "The maximum amount of system memory used by the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index ff156d007d..cea75148ed 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -183,10 +183,10 @@ class MODEL_EXPORT CResourceMonitor { std::size_t totalMemory() const; //! Returns the current physical memory of the process (rss) as reported by the system - std::size_t systemMemory() const; + static std::size_t systemMemory() ; //! Returns the maximum physical memory of the process (max rss) as reported by the system - std::size_t maxSystemMemory() const; + static std::size_t maxSystemMemory(); private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 9494935b92..acbcc14c04 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 + E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 3a8c06be2a..cc590d370b 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,6 +205,8 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index ecaaf7f272..e3635e279f 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,8 +76,6 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; - return rss; } @@ -90,11 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; - - return maxRSS; + return static_cast(rusage.ru_maxrss * 1024L); } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index d308463c4e..ce86647aed 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,6 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -34,10 +33,9 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; + // ru_maxrss is in bytes - return maxRSS; + return static_cast(rusage.ru_maxrss);; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index cc88daad75..933952e1f2 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,11 +29,7 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - auto workingSetSize = static_cast(stats.WorkingSetSize); - - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; - - return workingSetSize; + return static_cast(stats.WorkingSetSize); } std::size_t CProcessStats::maxResidentSetSize() { @@ -43,11 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; - - return peakWorkingSetSize; + return static_cast(stats.PeakWorkingSetSize); } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 69354f5629..7d71df1afb 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -494,11 +494,11 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::systemMemory() const { +std::size_t CResourceMonitor::systemMemory() { return core::CProcessStats::residentSetSize(); } -std::size_t CResourceMonitor::maxSystemMemory() const { +std::size_t CResourceMonitor::maxSystemMemory() { return core::CProcessStats::maxResidentSetSize(); } } // model From 3b69b72f8227388ed654ee0de9d381adec4adf88 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 May 2025 16:11:25 +1200 Subject: [PATCH 13/16] Formatting.. grr --- include/model/CResourceMonitor.h | 2 +- lib/api/CAnomalyJob.cc | 6 ++++-- lib/core/CProcessStats_MacOSX.cc | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index cea75148ed..68758d4f09 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -183,7 +183,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t totalMemory() const; //! Returns the current physical memory of the process (rss) as reported by the system - static std::size_t systemMemory() ; + static std::size_t systemMemory(); //! Returns the maximum physical memory of the process (max rss) as reported by the system static std::size_t maxSystemMemory(); diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index cc590d370b..a334fe34dc 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,8 +205,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); - core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = model::CResourceMonitor::systemMemory(); - core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = model::CResourceMonitor::maxSystemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = + model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = + model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index ce86647aed..61dc9fca66 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss);; + return static_cast(rusage.ru_maxrss); } } } From 1bafa8cdf1663636f7bb59ffbd2f31488e90835b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:24:50 +1200 Subject: [PATCH 14/16] On Linux only, use the value of the system memory usage (max resident set size) for the "model memory usage" and "peak model memory usage" fields reported to Java. --- include/model/CSystemMemoryUsage.h | 43 +++++++++++++++++++ lib/api/CModelSizeStatsJsonWriter.cc | 6 --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 --- .../unittest/CModelSnapshotJsonWriterTest.cc | 4 -- lib/model/CMakeLists.txt | 1 + lib/model/CResourceMonitor.cc | 6 ++- lib/model/CSystemMemoryUsage.cc | 23 ++++++++++ lib/model/CSystemMemoryUsage_Linux.cc | 24 +++++++++++ 8 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 include/model/CSystemMemoryUsage.h create mode 100644 lib/model/CSystemMemoryUsage.cc create mode 100644 lib/model/CSystemMemoryUsage_Linux.cc diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h new file mode 100644 index 0000000000..62adea2f29 --- /dev/null +++ b/include/model/CSystemMemoryUsage.h @@ -0,0 +1,43 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines the memory used by the current process based on the operating system. +class MODEL_EXPORT CSystemMemoryUsage { +public: + CSystemMemoryUsage() = default; + ~CSystemMemoryUsage() = default; + + CSystemMemoryUsage(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage(CSystemMemoryUsage&&) = delete; + CSystemMemoryUsage& operator=(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage& operator=(CSystemMemoryUsage&&) = delete; + + //! Return the system memory used by the current process. + //! \param memSize An estimate of the process memory + //! \return The system memory usage based on the current OS. + std::size_t operator()(std::size_t memSize); +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 12858914af..68b5b80c88 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -62,12 +62,6 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(SYSTEM_MEMORY_BYTES); - writer.onUint64(results.s_SystemMemoryUsage); - - writer.onKey(MAX_SYSTEM_MEMORY_BYTES); - writer.onUint64(results.s_MaxSystemMemoryUsage); - writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ea16f9a19d..19caa4dfc1 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,11 +1772,6 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index c3c32aa022..f2c22a41de 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -118,10 +118,6 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); - BOOST_REQUIRE_EQUAL( - 409600, modelSizeStats.at("system_memory_bytes").to_number()); - BOOST_REQUIRE_EQUAL( - 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index c53eec9fb0..37c8c1cc5c 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,6 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc + CSystemMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 7d71df1afb..1e131ce0b2 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -378,11 +379,12 @@ void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime, CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; + CSystemMemoryUsage systemMemoryUsage; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); + res.s_AdjustedUsage = systemMemoryUsage(this->adjustedUsage(res.s_Usage)); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_AdjustedPeakUsage = systemMemoryUsage(this->adjustedUsage(res.s_PeakUsage)); res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc new file mode 100644 index 0000000000..ae275beb34 --- /dev/null +++ b/lib/model/CSystemMemoryUsage.cc @@ -0,0 +1,23 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On platforms other than Linux the system memory usage is that provided - the estimated size of the models. +std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) { + return memSize; +} +} +} diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc new file mode 100644 index 0000000000..a32026f2a5 --- /dev/null +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -0,0 +1,24 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On Linux the system memory usage is actually that determined by the OS. +// The estimated value provided is ignored. +std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) { + return core::CProcessStats::maxResidentSetSize(); +} +} +} From efc311b5f432e3ed6183a5f7b74c28300973cbd5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:36:50 +1200 Subject: [PATCH 15/16] Fix copyright headers --- include/model/CSystemMemoryUsage.h | 3 ++- lib/model/CSystemMemoryUsage.cc | 2 +- lib/model/CSystemMemoryUsage_Linux.cc | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 62adea2f29..6e78091521 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when @@ -8,6 +8,7 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ + #ifndef INCLUDED_ml_model_CSystemMemoryUsage_h #define INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index ae275beb34..8fd926f65b 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index a32026f2a5..f3b21522d5 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when From 6be6395223ebe9ac0434c9cec57d6adada30a2a9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:50:11 +1200 Subject: [PATCH 16/16] Nits in test code --- lib/api/unittest/CAnomalyJobLimitTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index cc85baaf99..20e422134f 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); + nonLimitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); @@ -171,7 +171,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); + limitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } LOG_TRACE(<< outputStrm.str());