diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 904920e3db..7ca5a32337 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,9 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures, ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, - ml::counter_t::E_TSADOutputMemoryAllocatorUsage}; + ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADSystemMemoryUsage, + ml::counter_t::E_TSADMaxSystemMemoryUsage}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -151,6 +154,8 @@ int main(int argc, char** argv) { } cancellerThread.stop(); + LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 3b119797c1..aeb1f7748c 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 3c4d10269f..4fde6670ff 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,6 +112,12 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The resident set size of the process, in bytes. + E_TSADSystemMemoryUsage = 31, + + //! The maximum resident set size of the process, in bytes. + E_TSADMaxSystemMemoryUsage = 32, + // Data Frame Outlier Detection //! The estimated peak memory usage for outlier detection in bytes @@ -146,7 +152,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 31 + E_LastEnumCounter = 33 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -355,6 +361,10 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADSystemMemoryUsage, "E_TSADSystemMemoryUsage", + "The amount of system memory used by the process, in bytes"}, + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxSystemMemoryUsage", + "The maximum amount of system memory used by the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 5c7583888b..68758d4f09 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,6 +54,8 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; + std::size_t s_SystemMemoryUsage{0}; + std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -180,6 +182,12 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; + //! Returns the current physical memory of the process (rss) as reported by the system + static std::size_t systemMemory(); + + //! Returns the maximum physical memory of the process (max rss) as reported by the system + static std::size_t maxSystemMemory(); + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h new file mode 100644 index 0000000000..6e78091521 --- /dev/null +++ b/include/model/CSystemMemoryUsage.h @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines the memory used by the current process based on the operating system. +class MODEL_EXPORT CSystemMemoryUsage { +public: + CSystemMemoryUsage() = default; + ~CSystemMemoryUsage() = default; + + CSystemMemoryUsage(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage(CSystemMemoryUsage&&) = delete; + CSystemMemoryUsage& operator=(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage& operator=(CSystemMemoryUsage&&) = delete; + + //! Return the system memory used by the current process. + //! \param memSize An estimate of the process memory + //! \return The system memory usage based on the current OS. + std::size_t operator()(std::size_t memSize); +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 3a8c06be2a..a334fe34dc 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,6 +205,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = + model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = + model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 43fef49602..68b5b80c88 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,6 +25,8 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; +const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"}; +const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index b003e90a53..20e422134f 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -9,6 +9,7 @@ * limitation. */ #include +#include #include #include @@ -92,6 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; + std::size_t nonLimitedMaxSystemUsage{0}; + std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -105,8 +108,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; - //limits.resourceMonitor().m_ByteLimitHigh = 100000; - //limits.resourceMonitor().m_ByteLimitLow = 90000; { LOG_TRACE(<< "Setting up job"); @@ -127,8 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); + nonLimitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } } + LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); + BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -166,11 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); + limitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); + LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); + LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); + BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -375,6 +384,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); + LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -384,6 +395,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -428,6 +440,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -437,6 +451,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -479,6 +494,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -486,6 +503,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ba44163e7c..19caa4dfc1 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,21 +1728,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ByFields = 5; - resourceUsage.s_PartitionFields = 6; - resourceUsage.s_OverFields = 7; - resourceUsage.s_AllocationFailures = 8; + resourceUsage.s_SystemMemoryUsage = 5; + resourceUsage.s_MaxSystemMemoryUsage = 6; + resourceUsage.s_ByFields = 7; + resourceUsage.s_PartitionFields = 8; + resourceUsage.s_OverFields = 9; + resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes; - resourceUsage.s_BucketStartTime = 9; - resourceUsage.s_BytesExceeded = 10; - resourceUsage.s_BytesMemoryLimit = 11; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; + resourceUsage.s_BucketStartTime = 11; + resourceUsage.s_BytesExceeded = 12; + resourceUsage.s_BytesMemoryLimit = 13; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1771,43 +1773,43 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("total_partition_field_count").to_number()); + 8, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("bucket_allocation_failures_count").to_number()); + 10, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("current_model_bytes", + BOOST_REQUIRE_EQUAL("peak_model_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 11, sizeStats.at("model_bytes_memory_limit").to_number()); + 13, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 14, sizeStats.at("frequent_category_count").to_number()); + 16, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index ff404b7f91..f2c22a41de 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,6 +36,8 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) + 409600, // System memory used (rss) + 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e5ab8cdfd0..e3635e279f 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 9aa1e969c9..61dc9fca66 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -8,9 +8,11 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include +#include +#include + #include #include #include diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 7ca2d7e6c0..933952e1f2 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -8,8 +8,10 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include + +#include +#include #include #include @@ -36,6 +38,7 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError()); return 0; } + return static_cast(stats.PeakWorkingSetSize); } } diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index c53eec9fb0..37c8c1cc5c 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,6 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc + CSystemMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index d93b3b8bd8..1e131ce0b2 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -20,6 +21,7 @@ #include #include +#include #include #include @@ -377,11 +379,14 @@ void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime, CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; + CSystemMemoryUsage systemMemoryUsage; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); + res.s_AdjustedUsage = systemMemoryUsage(this->adjustedUsage(res.s_Usage)); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_AdjustedPeakUsage = systemMemoryUsage(this->adjustedUsage(res.s_PeakUsage)); + res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); + res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -491,5 +496,12 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } +std::size_t CResourceMonitor::systemMemory() { + return core::CProcessStats::residentSetSize(); +} + +std::size_t CResourceMonitor::maxSystemMemory() { + return core::CProcessStats::maxResidentSetSize(); +} } // model } // ml diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc new file mode 100644 index 0000000000..8fd926f65b --- /dev/null +++ b/lib/model/CSystemMemoryUsage.cc @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On platforms other than Linux the system memory usage is that provided - the estimated size of the models. +std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) { + return memSize; +} +} +} diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc new file mode 100644 index 0000000000..f3b21522d5 --- /dev/null +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On Linux the system memory usage is actually that determined by the OS. +// The estimated value provided is ignored. +std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) { + return core::CProcessStats::maxResidentSetSize(); +} +} +} diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f69dccc384..c17c644c80 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) { } BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { - // Clear the counter so that other test cases do not interfere. + // Clear the counters so that other test cases do not interfere. core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0; CLimits limits; @@ -549,6 +549,10 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_SystemMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -556,6 +560,10 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); + monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -563,12 +571,20 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); + + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) {