@@ -30,116 +30,89 @@ import (
3030const (
3131 unknown = "unknown"
3232 labelType = "label"
33- )
34-
35- var (
36- // tikv status counters.
37- tikvUpCounter = clusterStatusGauge .WithLabelValues ("store_up_count" , "tikv" )
38- tikvDiconnectedCounter = clusterStatusGauge .WithLabelValues ("store_disconnected_count" , "tikv" )
39- tikvDownCounter = clusterStatusGauge .WithLabelValues ("store_down_count" , "tikv" )
40- tikvUnhealthCounter = clusterStatusGauge .WithLabelValues ("store_unhealth_count" , "tikv" )
41- tikvOfflineCounter = clusterStatusGauge .WithLabelValues ("store_offline_count" , "tikv" )
42- tikvTombstoneCounter = clusterStatusGauge .WithLabelValues ("store_tombstone_count" , "tikv" )
43- tikvLowSpaceCounter = clusterStatusGauge .WithLabelValues ("store_low_space_count" , "tikv" )
44- tikvPreparingCounter = clusterStatusGauge .WithLabelValues ("store_preparing_count" , "tikv" )
45- tikvServingCounter = clusterStatusGauge .WithLabelValues ("store_serving_count" , "tikv" )
46- tikvRemovingCounter = clusterStatusGauge .WithLabelValues ("store_removing_count" , "tikv" )
47- tikvRemovedCounter = clusterStatusGauge .WithLabelValues ("store_removed_count" , "tikv" )
4833
49- // tiflash status counters.
50- tiflashUpCounter = clusterStatusGauge .WithLabelValues ("store_up_count" , "tiflash" )
51- tiflashDiconnectedCounter = clusterStatusGauge .WithLabelValues ("store_disconnected_count" , "tiflash" )
52- tiflashDownCounter = clusterStatusGauge .WithLabelValues ("store_down_count" , "tiflash" )
53- tiflashUnhealthCounter = clusterStatusGauge .WithLabelValues ("store_unhealth_count" , "tiflash" )
54- tiflashOfflineCounter = clusterStatusGauge .WithLabelValues ("store_offline_count" , "tiflash" )
55- tiflashTombstoneCounter = clusterStatusGauge .WithLabelValues ("store_tombstone_count" , "tiflash" )
56- tiflashLowSpaceCounter = clusterStatusGauge .WithLabelValues ("store_low_space_count" , "tiflash" )
57- tiflashPreparingCounter = clusterStatusGauge .WithLabelValues ("store_preparing_count" , "tiflash" )
58- tiflashServingCounter = clusterStatusGauge .WithLabelValues ("store_serving_count" , "tiflash" )
59- tiflashRemovingCounter = clusterStatusGauge .WithLabelValues ("store_removing_count" , "tiflash" )
60- tiflashRemovedCounter = clusterStatusGauge .WithLabelValues ("store_removed_count" , "tiflash" )
61-
62- // Store status metrics.
63- storeRegionCountGauge = clusterStatusGauge .WithLabelValues ("region_count" , "all" )
64- storeLeaderCountGauge = clusterStatusGauge .WithLabelValues ("leader_count" , "all" )
65- storeWitnessCountGauge = clusterStatusGauge .WithLabelValues ("witness_count" , "all" )
66- storeLearnerCountGauge = clusterStatusGauge .WithLabelValues ("learner_count" , "all" )
67- storeStorageSizeGauge = clusterStatusGauge .WithLabelValues ("storage_size" , "all" )
68- storeStorageCapacityGauge = clusterStatusGauge .WithLabelValues ("storage_capacity" , "all" )
34+ clusterStatusStoreUpCount = "store_up_count"
35+ clusterStatusStoreDisconnectedCount = "store_disconnected_count"
36+ clusterStatusStoreSlowCount = "store_slow_count"
37+ clusterStatusStoreDownCount = "store_down_count"
38+ clusterStatusStoreUnhealthCount = "store_unhealth_count"
39+ clusterStatusStoreOfflineCount = "store_offline_count"
40+ clusterStatusStoreTombstoneCount = "store_tombstone_count"
41+ clusterStatusStoreLowSpaceCount = "store_low_space_count"
42+ clusterStatusStorePreparingCount = "store_preparing_count"
43+ clusterStatusStoreServingCount = "store_serving_count"
44+ clusterStatusStoreRemovingCount = "store_removing_count"
45+ clusterStatusStoreRemovedCount = "store_removed_count"
46+
47+ clusterStatusRegionCount = "region_count"
48+ clusterStatusLeaderCount = "leader_count"
49+ clusterStatusWitnessCount = "witness_count"
50+ clusterStatusLearnerCount = "learner_count"
51+ clusterStatusStorageSize = "storage_size"
52+ clusterStatusStorageCapacity = "storage_capacity"
6953)
7054
7155type storeStatistics struct {
72- opt config.ConfProvider
73- StorageSize uint64
74- StorageCapacity uint64
75- RegionCount int
76- LeaderCount int
77- LearnerCount int
78- WitnessCount int
79- LabelCounter map [string ][]uint64
80-
81- engineStatistics map [string ]* storeStatusStatistics
56+ opt config.ConfProvider
57+ LabelCounter map [string ][]uint64
8258}
8359
84- type storeStatusStatistics struct {
85- opt config.ConfProvider
86- Up int
87- Disconnect int
88- Unhealthy int
89- Down int
90- Offline int
91- Tombstone int
92- LowSpace int
93- Slow int
94- Preparing int
95- Serving int
96- Removing int
97- Removed int
60+ func newStoreStatistics (opt config.ConfProvider ) * storeStatistics {
61+ return & storeStatistics {
62+ opt : opt ,
63+ LabelCounter : make (map [string ][]uint64 ),
64+ }
9865}
9966
100- func (s * storeStatusStatistics ) observe (store * core.StoreInfo ) {
67+ func (s * storeStatistics ) observeStoreStatus (store * core.StoreInfo ) map [string ]float64 {
68+ result := map [string ]float64 {
69+ clusterStatusStoreUpCount : 0 ,
70+ clusterStatusStoreDisconnectedCount : 0 ,
71+ clusterStatusStoreSlowCount : 0 ,
72+ clusterStatusStoreDownCount : 0 ,
73+ clusterStatusStoreUnhealthCount : 0 ,
74+ clusterStatusStoreOfflineCount : 0 ,
75+ clusterStatusStoreTombstoneCount : 0 ,
76+ clusterStatusStoreLowSpaceCount : 0 ,
77+ clusterStatusStorePreparingCount : 0 ,
78+ clusterStatusStoreServingCount : 0 ,
79+ clusterStatusStoreRemovingCount : 0 ,
80+ clusterStatusStoreRemovedCount : 0 ,
81+ }
82+
10183 // Store state.
10284 isDown := false
10385 switch store .GetNodeState () {
10486 case metapb .NodeState_Preparing , metapb .NodeState_Serving :
10587 if store .DownTime () >= s .opt .GetMaxStoreDownTime () {
10688 isDown = true
107- s . Down ++
89+ result [ clusterStatusStoreDownCount ] ++
10890 } else if store .IsUnhealthy () {
109- s . Unhealthy ++
91+ result [ clusterStatusStoreUnhealthCount ] ++
11092 } else if store .IsDisconnected () {
111- s . Disconnect ++
93+ result [ clusterStatusStoreDisconnectedCount ] ++
11294 } else if store .IsSlow () {
113- s . Slow ++
95+ result [ clusterStatusStoreSlowCount ] ++
11496 } else {
115- s . Up ++
97+ result [ clusterStatusStoreUpCount ] ++
11698 }
11799 if store .IsPreparing () {
118- s . Preparing ++
100+ result [ clusterStatusStorePreparingCount ] ++
119101 } else {
120- s . Serving ++
102+ result [ clusterStatusStoreServingCount ] ++
121103 }
122104 case metapb .NodeState_Removing :
123- s . Offline ++
124- s . Removing ++
105+ result [ clusterStatusStoreOfflineCount ] ++
106+ result [ clusterStatusStoreRemovingCount ] ++
125107 case metapb .NodeState_Removed :
126- s . Tombstone ++
127- s . Removed ++
128- return
108+ result [ clusterStatusStoreTombstoneCount ] ++
109+ result [ clusterStatusStoreRemovedCount ] ++
110+ return result
129111 }
130112 if ! isDown && store .IsLowSpace (s .opt .GetLowSpaceRatio ()) {
131- s .LowSpace ++
132- }
133- }
134-
135- func newStoreStatistics (opt config.ConfProvider ) * storeStatistics {
136- statistics := make (map [string ]* storeStatusStatistics , 1 )
137- statistics [core .EngineTiKV ] = & storeStatusStatistics {opt : opt }
138- return & storeStatistics {
139- opt : opt ,
140- LabelCounter : make (map [string ][]uint64 ),
141- engineStatistics : statistics ,
113+ result [clusterStatusStoreLowSpaceCount ]++
142114 }
115+ return result
143116}
144117
145118func (s * storeStatistics ) observe (store * core.StoreInfo ) {
@@ -156,31 +129,28 @@ func (s *storeStatistics) observe(store *core.StoreInfo) {
156129 }
157130 storeAddress := store .GetAddress ()
158131 id := strconv .FormatUint (store .GetID (), 10 )
159- // Store state.
160- var statistics * storeStatusStatistics
161- if ! store .IsTiKV () {
162- statistics = s .engineStatistics [core .EngineTiFlash ]
163- if statistics == nil {
164- s .engineStatistics [core .EngineTiFlash ] = & storeStatusStatistics {opt : s .opt }
165- statistics = s .engineStatistics [core .EngineTiFlash ]
166- }
132+ var engine string
133+ if store .IsTiKV () {
134+ engine = core .EngineTiKV
167135 } else {
168- // tikv statistics has been initialized in newStoreStatistics.
169- statistics = s .engineStatistics [core .EngineTiKV ]
136+ engine = core .EngineTiFlash
137+ }
138+ storeStatusStats := s .observeStoreStatus (store )
139+ for statusType , value := range storeStatusStats {
140+ clusterStatusGauge .WithLabelValues (statusType , engine , id ).Set (value )
170141 }
171- statistics .observe (store )
172142 // skip tombstone store avoid to overwrite metrics
173143 if store .GetNodeState () == metapb .NodeState_Removed {
174144 return
175145 }
176146
177147 // Store stats.
178- s . StorageSize += store .StorageSize ()
179- s . StorageCapacity += store .GetCapacity ()
180- s . RegionCount += store .GetRegionCount ()
181- s . LeaderCount += store .GetLeaderCount ()
182- s . WitnessCount += store .GetWitnessCount ()
183- s . LearnerCount += store .GetLearnerCount ()
148+ clusterStatusGauge . WithLabelValues ( clusterStatusStorageSize , engine , id ). Set ( float64 ( store .StorageSize ()) )
149+ clusterStatusGauge . WithLabelValues ( clusterStatusStorageCapacity , engine , id ). Set ( float64 ( store .GetCapacity ()) )
150+ clusterStatusGauge . WithLabelValues ( clusterStatusRegionCount , engine , id ). Set ( float64 ( store .GetRegionCount ()) )
151+ clusterStatusGauge . WithLabelValues ( clusterStatusLeaderCount , engine , id ). Set ( float64 ( store .GetLeaderCount ()) )
152+ clusterStatusGauge . WithLabelValues ( clusterStatusWitnessCount , engine , id ). Set ( float64 ( store .GetWitnessCount ()) )
153+ clusterStatusGauge . WithLabelValues ( clusterStatusLearnerCount , engine , id ). Set ( float64 ( store .GetLearnerCount ()) )
184154 limit , ok := store .GetStoreLimit ().(* storelimit.SlidingWindows )
185155 if ok {
186156 cap := limit .GetCap ()
@@ -247,46 +217,6 @@ func ObserveHotStat(store *core.StoreInfo, stats *StoresStats) {
247217func (s * storeStatistics ) collect () {
248218 placementStatusGauge .Reset ()
249219
250- // tikv store status metrics.
251- tikvStatistics , ok := s .engineStatistics [core .EngineTiKV ]
252- if ok {
253- tikvUpCounter .Set (float64 (tikvStatistics .Up ))
254- tikvDiconnectedCounter .Set (float64 (tikvStatistics .Disconnect ))
255- tikvDownCounter .Set (float64 (tikvStatistics .Down ))
256- tikvUnhealthCounter .Set (float64 (tikvStatistics .Unhealthy ))
257- tikvOfflineCounter .Set (float64 (tikvStatistics .Offline ))
258- tikvTombstoneCounter .Set (float64 (tikvStatistics .Tombstone ))
259- tikvLowSpaceCounter .Set (float64 (tikvStatistics .LowSpace ))
260- tikvPreparingCounter .Set (float64 (tikvStatistics .Preparing ))
261- tikvServingCounter .Set (float64 (tikvStatistics .Serving ))
262- tikvRemovingCounter .Set (float64 (tikvStatistics .Removing ))
263- tikvRemovedCounter .Set (float64 (tikvStatistics .Removed ))
264- }
265-
266- // tiflash store status metrics.
267- tiflashStatistics , ok := s .engineStatistics [core .EngineTiFlash ]
268- if ok {
269- tiflashUpCounter .Set (float64 (tiflashStatistics .Up ))
270- tiflashDiconnectedCounter .Set (float64 (tiflashStatistics .Disconnect ))
271- tiflashDownCounter .Set (float64 (tiflashStatistics .Down ))
272- tiflashUnhealthCounter .Set (float64 (tiflashStatistics .Unhealthy ))
273- tiflashOfflineCounter .Set (float64 (tiflashStatistics .Offline ))
274- tiflashTombstoneCounter .Set (float64 (tiflashStatistics .Tombstone ))
275- tiflashLowSpaceCounter .Set (float64 (tiflashStatistics .LowSpace ))
276- tiflashPreparingCounter .Set (float64 (tiflashStatistics .Preparing ))
277- tiflashServingCounter .Set (float64 (tiflashStatistics .Serving ))
278- tiflashRemovingCounter .Set (float64 (tiflashStatistics .Removing ))
279- tiflashRemovedCounter .Set (float64 (tiflashStatistics .Removed ))
280- }
281-
282- // Store status metrics.
283- storeRegionCountGauge .Set (float64 (s .RegionCount ))
284- storeLeaderCountGauge .Set (float64 (s .LeaderCount ))
285- storeWitnessCountGauge .Set (float64 (s .WitnessCount ))
286- storeLearnerCountGauge .Set (float64 (s .LearnerCount ))
287- storeStorageSizeGauge .Set (float64 (s .StorageSize ))
288- storeStorageCapacityGauge .Set (float64 (s .StorageCapacity ))
289-
290220 // Current scheduling configurations of the cluster
291221 configs := make (map [string ]float64 )
292222 configs ["leader-schedule-limit" ] = float64 (s .opt .GetLeaderScheduleLimit ())
@@ -374,6 +304,7 @@ func ResetStoreStatistics(storeAddress string, id string) {
374304 for _ , m := range metrics {
375305 storeStatusGauge .DeleteLabelValues (storeAddress , id , m )
376306 }
307+ clusterStatusGauge .DeletePartialMatch (utils .SingleLabel ("store" , id ))
377308}
378309
379310type storeStatisticsMap struct {
@@ -403,44 +334,8 @@ func (m *storeStatisticsMap) Collect() {
403334func Reset () {
404335 storeStatusGauge .Reset ()
405336 placementStatusGauge .Reset ()
406- ResetClusterStatusMetrics ()
337+ clusterStatusGauge . Reset ()
407338 ResetRegionStatsMetrics ()
408339 ResetLabelStatsMetrics ()
409340 ResetHotCacheStatusMetrics ()
410341}
411-
412- // ResetClusterStatusMetrics resets the cluster status metrics.
413- func ResetClusterStatusMetrics () {
414- tikvUpCounter .Set (0 )
415- tikvDiconnectedCounter .Set (0 )
416- tikvDownCounter .Set (0 )
417- tikvUnhealthCounter .Set (0 )
418- tikvOfflineCounter .Set (0 )
419- tikvTombstoneCounter .Set (0 )
420- tikvLowSpaceCounter .Set (0 )
421- tikvPreparingCounter .Set (0 )
422- tikvServingCounter .Set (0 )
423- tikvRemovingCounter .Set (0 )
424- tikvRemovedCounter .Set (0 )
425-
426- // tiflash status counters.
427- tiflashUpCounter .Set (0 )
428- tiflashDiconnectedCounter .Set (0 )
429- tiflashDownCounter .Set (0 )
430- tiflashUnhealthCounter .Set (0 )
431- tiflashOfflineCounter .Set (0 )
432- tiflashTombstoneCounter .Set (0 )
433- tiflashLowSpaceCounter .Set (0 )
434- tiflashPreparingCounter .Set (0 )
435- tiflashServingCounter .Set (0 )
436- tiflashRemovingCounter .Set (0 )
437- tiflashRemovedCounter .Set (0 )
438-
439- // Store status metrics.
440- storeRegionCountGauge .Set (0 )
441- storeLeaderCountGauge .Set (0 )
442- storeWitnessCountGauge .Set (0 )
443- storeLearnerCountGauge .Set (0 )
444- storeStorageSizeGauge .Set (0 )
445- storeStorageCapacityGauge .Set (0 )
446- }
0 commit comments