@@ -64,6 +64,7 @@ func (m *KernelMonitor) Register(ctx context.Context, mgr monitor.Manager) error
6464 util .NewChannelHandler (func (time.Time ) error { return m .handleZombies () }, util .TimeTickWithJitterContext (ctx , 5 * time .Minute )),
6565 util .NewChannelHandler (func (time.Time ) error { return m .handleOpenedFiles () }, util .TimeTickWithJitterContext (ctx , 5 * time .Minute )),
6666 util .NewChannelHandler (func (time.Time ) error { return m .handleEnvironment () }, util .TimeTickWithJitterContext (ctx , 5 * time .Minute )),
67+ util .NewChannelHandler (func (time.Time ) error { return m .handleZram () }, util .TimeTickWithJitterContext (ctx , 5 * time .Minute )),
6768 } {
6869 go handler .Start (ctx )
6970 }
@@ -349,3 +350,61 @@ func (k *KernelMonitor) checkEnvironment(envBytes []byte, pid int) error {
349350 }
350351 return nil
351352}
353+
354+ // ~~~~ zram ~~~~
355+
356+ func (k * KernelMonitor ) handleZram () error {
357+ zramDirs , err := filepath .Glob (config .ToHostPath ("/sys/block/zram*" ))
358+ if err != nil {
359+ return err
360+ }
361+ if len (zramDirs ) == 0 {
362+ k .logger .V (1 ).Info ("ZRAM devices not found on this node" )
363+ return nil
364+ }
365+ for _ , dir := range zramDirs {
366+ deviceName := filepath .Base (dir )
367+ mmStatData , err := os .ReadFile (filepath .Join (dir , "mm_stat" ))
368+ if err != nil {
369+ k .logger .V (1 ).Info ("failed to read ZRAM mm_stat" , "device" , deviceName , "error" , err )
370+ continue
371+ }
372+ fields := strings .Fields (string (mmStatData ))
373+ // mm_stat format: orig_data_size compr_data_size mem_used_total ...
374+ // We need at least the first 2 fields
375+ if len (fields ) < 2 {
376+ k .logger .V (1 ).Info ("invalid ZRAM mm_stat format" , "device" , deviceName , "fields" , len (fields ))
377+ continue
378+ }
379+ origSize , _ := strconv .ParseInt (fields [0 ], 10 , 64 )
380+ compSize , _ := strconv .ParseInt (fields [1 ], 10 , 64 )
381+ if origSize == 0 {
382+ continue
383+ }
384+ disksizeData , err := os .ReadFile (filepath .Join (dir , "disksize" ))
385+ if err != nil {
386+ continue
387+ }
388+ disksize , _ := strconv .ParseInt (strings .TrimSpace (string (disksizeData )), 10 , 64 )
389+ if err := k .checkZram (origSize , compSize , disksize , deviceName ); err != nil {
390+ return err
391+ }
392+ }
393+ return nil
394+ }
395+
396+ func (k * KernelMonitor ) checkZram (origSize , compSize , disksize int64 , deviceName string ) error {
397+ if disksize == 0 {
398+ return nil
399+ }
400+ usagePercent := float64 (origSize ) / float64 (disksize )
401+ if usagePercent > 0.10 {
402+ return k .manager .Notify (context .Background (), monitor.Condition {
403+ Reason : "ZramHighUsage" ,
404+ Message : fmt .Sprintf ("ZRAM device %s at %.1f%% capacity" , deviceName , usagePercent * 100 ),
405+ Severity : monitor .SeverityWarning ,
406+ })
407+ }
408+ return nil
409+
410+ }
0 commit comments