Skip to content

Commit a257413

Browse files
authored
Aggregate nodes memory stats for tenant (#11612)
1 parent 564ee81 commit a257413

File tree

5 files changed

+305
-0
lines changed

5 files changed

+305
-0
lines changed

ydb/core/base/memory_stats.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#pragma once
2+
3+
#include "defs.h"
4+
#include <ydb/core/protos/memory_stats.pb.h>
5+
6+
namespace NKikimr::NMemory {
7+
8+
namespace {
9+
10+
#define ADD_MEMORY(name) \
11+
inline void Add##name##To(NKikimrMemory::TMemoryStats& left, const NKikimrMemory::TMemoryStats& right) { \
12+
left.Set##name(left.Get##name() + right.Get##name()); \
13+
}
14+
15+
ADD_MEMORY(ExternalConsumption)
16+
ADD_MEMORY(AnonRss)
17+
18+
ADD_MEMORY(MemTotal)
19+
ADD_MEMORY(MemAvailable)
20+
ADD_MEMORY(HardLimit)
21+
ADD_MEMORY(SoftLimit)
22+
ADD_MEMORY(TargetUtilization)
23+
24+
}
25+
26+
class TMemoryStatsAggregator {
27+
using TMemoryStats = NKikimrMemory::TMemoryStats;
28+
29+
TMemoryStats Aggregated;
30+
TMap<TString, TMemoryStats> PerHost;
31+
32+
public:
33+
void Add(const TMemoryStats& memoryStats, TString host) {
34+
AddMemoryStats(Aggregated, memoryStats);
35+
36+
if (!memoryStats.HasExternalConsumption()) {
37+
return;
38+
}
39+
40+
// a special case when there are multiple YDB nodes share one host memory should be handled as follows:
41+
42+
auto& hostMemoryStats = PerHost[host];
43+
// subtract all ydb consumption seen before
44+
hostMemoryStats.SetExternalConsumption(memoryStats.GetExternalConsumption() - hostMemoryStats.GetAnonRss());
45+
AddAnonRssTo(hostMemoryStats, memoryStats);
46+
47+
hostMemoryStats.SetMemTotal(memoryStats.GetMemTotal());
48+
hostMemoryStats.SetMemAvailable(memoryStats.GetMemAvailable());
49+
hostMemoryStats.SetHardLimit(memoryStats.GetHardLimit());
50+
hostMemoryStats.SetSoftLimit(memoryStats.GetSoftLimit());
51+
hostMemoryStats.SetTargetUtilization(memoryStats.GetTargetUtilization());
52+
}
53+
54+
TMemoryStats Aggregate() {
55+
if (!PerHost) {
56+
return Aggregated;
57+
}
58+
59+
// a special case when there are multiple YDB nodes share one host memory should be handled as follows:
60+
61+
Aggregated.SetExternalConsumption(0);
62+
Aggregated.SetMemTotal(0);
63+
Aggregated.SetMemAvailable(0);
64+
Aggregated.SetHardLimit(0);
65+
Aggregated.SetSoftLimit(0);
66+
Aggregated.SetTargetUtilization(0);
67+
for (const auto& host_ : PerHost) {
68+
auto& host = host_.second;
69+
AddExternalConsumptionTo(Aggregated, host);
70+
AddMemTotalTo(Aggregated, host);
71+
AddMemAvailableTo(Aggregated, host);
72+
AddHardLimitTo(Aggregated, host);
73+
AddSoftLimitTo(Aggregated, host);
74+
AddTargetUtilizationTo(Aggregated, host);
75+
}
76+
77+
return Aggregated;
78+
}
79+
80+
private:
81+
static void AddMemoryStats(TMemoryStats& left, const TMemoryStats& right) {
82+
using namespace ::google::protobuf;
83+
const Descriptor& descriptor = *TMemoryStats::GetDescriptor();
84+
const Reflection& reflection = *TMemoryStats::GetReflection();
85+
int fieldCount = descriptor.field_count();
86+
for (int index = 0; index < fieldCount; ++index) {
87+
const FieldDescriptor* field = descriptor.field(index);
88+
if (reflection.HasField(right, field)) {
89+
FieldDescriptor::CppType type = field->cpp_type();
90+
switch (type) {
91+
case FieldDescriptor::CPPTYPE_UINT64:
92+
reflection.SetUInt64(&left, field,
93+
reflection.GetUInt64(left, field) + reflection.GetUInt64(right, field));
94+
break;
95+
default:
96+
Y_DEBUG_ABORT_UNLESS("Unhandled field type");
97+
}
98+
}
99+
}
100+
}
101+
};
102+
103+
}

ydb/core/base/ut/memory_stats_ut.cpp

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
#include "memory_stats.h"
2+
3+
#include <library/cpp/testing/unittest/registar.h>
4+
5+
namespace NKikimr::NMemory {
6+
7+
using TMemoryStats = NKikimrMemory::TMemoryStats;
8+
9+
Y_UNIT_TEST_SUITE (TMemoryStatsAggregator) {
10+
11+
TMemoryStats GenerateStats(ui64 index) {
12+
TMemoryStats stats;
13+
stats.SetAnonRss(10 + index);
14+
stats.SetCGroupLimit(20 + index);
15+
stats.SetMemTotal(30 + index);
16+
stats.SetMemAvailable(40 + index);
17+
stats.SetAllocatedMemory(50 + index);
18+
stats.SetAllocatorCachesMemory(60 + index);
19+
stats.SetHardLimit(70 + index);
20+
stats.SetSoftLimit(80 + index);
21+
stats.SetTargetUtilization(90 + index);
22+
stats.SetExternalConsumption(100 + index);
23+
stats.SetSharedCacheConsumption(110 + index);
24+
stats.SetSharedCacheLimit(120 + index);
25+
stats.SetMemTableConsumption(130 + index);
26+
stats.SetMemTableLimit(140 + index);
27+
stats.SetQueryExecutionConsumption(150 + index);
28+
stats.SetQueryExecutionLimit(160 + index);
29+
return stats;
30+
}
31+
32+
Y_UNIT_TEST (Aggregate_Empty) {
33+
TMemoryStatsAggregator aggregator;
34+
35+
TMemoryStats aggregated = aggregator.Aggregate();
36+
37+
Cerr << aggregated.ShortDebugString() << Endl;
38+
39+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(), "");
40+
}
41+
42+
Y_UNIT_TEST (Aggregate_Single) {
43+
TMemoryStatsAggregator aggregator;
44+
45+
TMemoryStats stats1 = GenerateStats(1);
46+
47+
Cerr << stats1.ShortDebugString() << Endl;
48+
49+
aggregator.Add(stats1, "host");
50+
51+
TMemoryStats aggregated = aggregator.Aggregate();
52+
53+
Cerr << aggregated.ShortDebugString() << Endl;
54+
55+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(), stats1.ShortDebugString());
56+
}
57+
58+
Y_UNIT_TEST (Aggregate_Summarize_ExternalConsumption_DifferentHosts) {
59+
TMemoryStatsAggregator aggregator;
60+
61+
TMemoryStats stats1 = GenerateStats(1);
62+
TMemoryStats stats2 = GenerateStats(2);
63+
TMemoryStats stats3 = GenerateStats(3);
64+
65+
Cerr << stats1.ShortDebugString() << Endl;
66+
Cerr << stats2.ShortDebugString() << Endl;
67+
Cerr << stats3.ShortDebugString() << Endl;
68+
69+
aggregator.Add(stats1, "host1");
70+
aggregator.Add(stats2, "host2");
71+
aggregator.Add(stats3, "host3");
72+
73+
TMemoryStats aggregated = aggregator.Aggregate();
74+
75+
Cerr << aggregated.ShortDebugString() << Endl;
76+
77+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(),
78+
"AnonRss: 36 CGroupLimit: 66 MemTotal: 96 MemAvailable: 126 AllocatedMemory: 156 AllocatorCachesMemory: 186 HardLimit: 216 SoftLimit: 246 TargetUtilization: 276 ExternalConsumption: 306 SharedCacheConsumption: 336 SharedCacheLimit: 366 MemTableConsumption: 396 MemTableLimit: 426 QueryExecutionConsumption: 456 QueryExecutionLimit: 486");
79+
}
80+
81+
Y_UNIT_TEST (Aggregate_Summarize_NoExternalConsumption_DifferentHosts) {
82+
TMemoryStatsAggregator aggregator;
83+
84+
TMemoryStats stats1 = GenerateStats(1);
85+
TMemoryStats stats2 = GenerateStats(2);
86+
TMemoryStats stats3 = GenerateStats(3);
87+
88+
stats1.ClearExternalConsumption();
89+
stats2.ClearExternalConsumption();
90+
stats3.ClearExternalConsumption();
91+
92+
Cerr << stats1.ShortDebugString() << Endl;
93+
Cerr << stats2.ShortDebugString() << Endl;
94+
Cerr << stats3.ShortDebugString() << Endl;
95+
96+
aggregator.Add(stats1, "host1");
97+
aggregator.Add(stats2, "host2");
98+
aggregator.Add(stats3, "host3");
99+
100+
TMemoryStats aggregated = aggregator.Aggregate();
101+
102+
Cerr << aggregated.ShortDebugString() << Endl;
103+
104+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(),
105+
"AnonRss: 36 CGroupLimit: 66 MemTotal: 96 MemAvailable: 126 AllocatedMemory: 156 AllocatorCachesMemory: 186 HardLimit: 216 SoftLimit: 246 TargetUtilization: 276 SharedCacheConsumption: 336 SharedCacheLimit: 366 MemTableConsumption: 396 MemTableLimit: 426 QueryExecutionConsumption: 456 QueryExecutionLimit: 486");
106+
}
107+
108+
Y_UNIT_TEST (Aggregate_Summarize_ExternalConsumption_OneHost) {
109+
TMemoryStatsAggregator aggregator;
110+
111+
TMemoryStats stats1 = GenerateStats(1);
112+
TMemoryStats stats2 = GenerateStats(2);
113+
TMemoryStats stats3 = GenerateStats(3);
114+
115+
Cerr << stats1.ShortDebugString() << Endl;
116+
Cerr << stats2.ShortDebugString() << Endl;
117+
Cerr << stats3.ShortDebugString() << Endl;
118+
119+
aggregator.Add(stats1, "host");
120+
aggregator.Add(stats2, "host");
121+
aggregator.Add(stats3, "host");
122+
123+
TMemoryStats aggregated = aggregator.Aggregate();
124+
125+
Cerr << aggregated.ShortDebugString() << Endl;
126+
127+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(),
128+
"AnonRss: 36 CGroupLimit: 66 MemTotal: 33 MemAvailable: 43 AllocatedMemory: 156 AllocatorCachesMemory: 186 HardLimit: 73 SoftLimit: 83 TargetUtilization: 93 ExternalConsumption: 80 SharedCacheConsumption: 336 SharedCacheLimit: 366 MemTableConsumption: 396 MemTableLimit: 426 QueryExecutionConsumption: 456 QueryExecutionLimit: 486");
129+
}
130+
131+
Y_UNIT_TEST (Aggregate_Summarize_NoExternalConsumption_OneHost) {
132+
TMemoryStatsAggregator aggregator;
133+
134+
TMemoryStats stats1 = GenerateStats(1);
135+
TMemoryStats stats2 = GenerateStats(2);
136+
TMemoryStats stats3 = GenerateStats(3);
137+
138+
Cerr << stats1.ShortDebugString() << Endl;
139+
Cerr << stats2.ShortDebugString() << Endl;
140+
Cerr << stats3.ShortDebugString() << Endl;
141+
142+
stats1.ClearExternalConsumption();
143+
stats2.ClearExternalConsumption();
144+
stats3.ClearExternalConsumption();
145+
146+
aggregator.Add(stats1, "host");
147+
aggregator.Add(stats2, "host");
148+
aggregator.Add(stats3, "host");
149+
150+
TMemoryStats aggregated = aggregator.Aggregate();
151+
152+
Cerr << aggregated.ShortDebugString() << Endl;
153+
154+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(),
155+
"AnonRss: 36 CGroupLimit: 66 MemTotal: 96 MemAvailable: 126 AllocatedMemory: 156 AllocatorCachesMemory: 186 HardLimit: 216 SoftLimit: 246 TargetUtilization: 276 SharedCacheConsumption: 336 SharedCacheLimit: 366 MemTableConsumption: 396 MemTableLimit: 426 QueryExecutionConsumption: 456 QueryExecutionLimit: 486");
156+
}
157+
158+
Y_UNIT_TEST (Aggregate_ExternalConsumption_CollidingHosts) {
159+
TMemoryStatsAggregator aggregator;
160+
161+
TMemoryStats stats1 = GenerateStats(1);
162+
TMemoryStats stats2 = GenerateStats(2);
163+
TMemoryStats stats3 = GenerateStats(3);
164+
165+
Cerr << stats1.ShortDebugString() << Endl;
166+
Cerr << stats2.ShortDebugString() << Endl;
167+
Cerr << stats3.ShortDebugString() << Endl;
168+
169+
aggregator.Add(stats1, "host1");
170+
aggregator.Add(stats2, "host1");
171+
aggregator.Add(stats3, "host2");
172+
173+
TMemoryStats aggregated = aggregator.Aggregate();
174+
175+
Cerr << aggregated.ShortDebugString() << Endl;
176+
177+
UNIT_ASSERT_VALUES_EQUAL(aggregated.ShortDebugString(),
178+
"AnonRss: 36 CGroupLimit: 66 MemTotal: 65 MemAvailable: 85 AllocatedMemory: 156 AllocatorCachesMemory: 186 HardLimit: 145 SoftLimit: 165 TargetUtilization: 185 ExternalConsumption: 194 SharedCacheConsumption: 336 SharedCacheLimit: 366 MemTableConsumption: 396 MemTableLimit: 426 QueryExecutionConsumption: 456 QueryExecutionLimit: 486");
179+
}
180+
}
181+
182+
}

ydb/core/base/ut/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ SRCS(
1414
blobstorage_grouptype_ut.cpp
1515
localdb_ut.cpp
1616
logoblob_ut.cpp
17+
memory_stats_ut.cpp
1718
statestorage_ut.cpp
1819
statestorage_guardian_impl_ut.cpp
1920
table_index_ut.cpp

ydb/core/viewer/protos/viewer.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ syntax = "proto3";
22

33
import "ydb/core/graph/protos/graph.proto";
44
import "ydb/core/protos/tablet_counters.proto";
5+
import "ydb/core/protos/memory_stats.proto";
56
import "ydb/core/protos/node_whiteboard.proto";
67
import "ydb/core/protos/flat_scheme_op.proto";
78
import "ydb/core/protos/tablet.proto";
@@ -396,6 +397,7 @@ message TTenant {
396397
repeated TStorageUsage TablesStorage = 44;
397398
repeated TStorageUsage DatabaseStorage = 45;
398399
uint32 CoresTotal = 50;
400+
optional NKikimrMemory.TMemoryStats MemoryStats = 51;
399401
}
400402

401403
message TTenants {

ydb/core/viewer/viewer_tenantinfo.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "viewer_tabletinfo.h"
88
#include "wb_aggregate.h"
99
#include "wb_merge.h"
10+
#include <ydb/core/base/memory_stats.h>
1011

1112
namespace NKikimr::NViewer {
1213

@@ -44,6 +45,7 @@ class TJsonTenantInfo : public TViewerPipeClient {
4445
bool Tablets = false;
4546
bool SystemTablets = false;
4647
bool Storage = false;
48+
bool MemoryStats = false;
4749
bool Nodes = false;
4850
bool Users = false;
4951
bool OffloadMerge = false;
@@ -108,6 +110,7 @@ class TJsonTenantInfo : public TViewerPipeClient {
108110
Tablets = FromStringWithDefault<bool>(params.Get("tablets"), Tablets);
109111
SystemTablets = FromStringWithDefault<bool>(params.Get("system_tablets"), Tablets); // Tablets here is by design
110112
Storage = FromStringWithDefault<bool>(params.Get("storage"), Storage);
113+
MemoryStats = FromStringWithDefault<bool>(params.Get("memory"), MemoryStats);
111114
Nodes = FromStringWithDefault<bool>(params.Get("nodes"), Nodes);
112115
Users = FromStringWithDefault<bool>(params.Get("users"), Users);
113116
User = params.Get("user");
@@ -274,6 +277,9 @@ class TJsonTenantInfo : public TViewerPipeClient {
274277
request->Record.MutableFieldsRequired()->CopyFrom(GetDefaultWhiteboardFields<NKikimrWhiteboard::TSystemStateInfo>());
275278
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresUsedFieldNumber);
276279
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresTotalFieldNumber);
280+
if (MemoryStats) {
281+
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kMemoryStatsFieldNumber);
282+
}
277283
SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release()));
278284
}
279285
}
@@ -781,6 +787,7 @@ class TJsonTenantInfo : public TViewerPipeClient {
781787
}
782788

783789
THashSet<TNodeId> tenantNodes;
790+
NMemory::TMemoryStatsAggregator tenantMemoryStats;
784791

785792
for (TNodeId nodeId : tenant.GetNodeIds()) {
786793
auto itNodeInfo = nodeSystemStateInfo.find(nodeId);
@@ -828,16 +835,21 @@ class TJsonTenantInfo : public TViewerPipeClient {
828835
if (nodeInfo.HasMemoryLimit()) {
829836
tenant.SetMemoryLimit(tenant.GetMemoryLimit() + nodeInfo.GetMemoryLimit());
830837
}
838+
if (nodeInfo.HasMemoryStats()) {
839+
tenantMemoryStats.Add(nodeInfo.GetMemoryStats(), nodeInfo.GetHost());
840+
}
831841
overall = Max(overall, GetViewerFlag(nodeInfo.GetSystemState()));
832842
}
833843
tenantNodes.emplace(nodeId);
834844
}
845+
tenant.MutableMemoryStats()->CopyFrom(tenantMemoryStats.Aggregate());
835846
if (tenant.GetType() == NKikimrViewer::Serverless) {
836847
tenant.SetStorageAllocatedSize(tenant.GetMetrics().GetStorage());
837848
const bool noExclusiveNodes = tenantNodes.empty();
838849
if (noExclusiveNodes) {
839850
tenant.SetMemoryUsed(tenant.GetMetrics().GetMemory());
840851
tenant.ClearMemoryLimit();
852+
tenant.ClearMemoryStats();
841853
tenant.SetCoresUsed(static_cast<double>(tenant.GetMetrics().GetCPU()) / 1000000);
842854
}
843855
}
@@ -961,6 +973,11 @@ class TJsonTenantInfo : public TViewerPipeClient {
961973
.Description = "return storage info",
962974
.Type = "boolean",
963975
});
976+
yaml.AddParameter({
977+
.Name = "memory",
978+
.Description = "return memory info",
979+
.Type = "boolean",
980+
});
964981
yaml.AddParameter({
965982
.Name = "nodes",
966983
.Description = "return nodes info",

0 commit comments

Comments
 (0)