Skip to content

Commit f0e001b

Browse files
localai-botmudler
andauthored
fix(xsysinfo): container-aware total RAM detection (cgroup/lxcfs) (#8059) (#10288)
fix(xsysinfo): make reported system RAM total cgroup/lxcfs-aware (#8059) GetSystemRAMInfo derived Total from memory.TotalMemory(), which on Linux uses syscall.Sysinfo().Totalram - the HOST kernel total. lxcfs/LXD does NOT virtualize that value, while MemAvailable (used for Free/Available) IS virtualized. Inside an LXD/container with a 128Gi host but a ~10Gi container view this produced Total=128Gi, Available=10Gi => Used=118Gi, reporting ~92% RAM usage on an idle container. Derive Total instead from the minimum of all non-zero, non-unlimited candidates: cgroup v2 memory.max, cgroup v1 memory.limit_in_bytes (the kernel unlimited sentinel is ignored), /proc/meminfo MemTotal (which lxcfs virtualizes), and the syscall.Sysinfo total as the bare-metal fallback. On bare metal every candidate is unlimited or equals the host total, so behavior is unchanged. The selection/parsing lives in a pure function chooseTotalMemory(...) taking file CONTENTS, unit-tested without a real LXD host; OS file reads stay in a thin wrapper. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
1 parent cf9debf commit f0e001b

3 files changed

Lines changed: 242 additions & 5 deletions

File tree

pkg/xsysinfo/memory.go

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
11
package xsysinfo
22

33
import (
4+
"os"
5+
46
"github.com/mudler/memory"
57
)
68

9+
// cgroup/proc paths used to make the reported RAM total container-aware.
10+
// They are variables (not consts) so tests could override them if needed.
11+
var (
12+
cgroupV2MaxPath = "/sys/fs/cgroup/memory.max"
13+
cgroupV1LimitPath = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
14+
procMemInfoPath = "/proc/meminfo"
15+
)
16+
717
// SystemRAMInfo contains system RAM usage information
818
type SystemRAMInfo struct {
919
Total uint64 `json:"total"`
@@ -13,12 +23,45 @@ type SystemRAMInfo struct {
1323
UsagePercent float64 `json:"usage_percent"`
1424
}
1525

26+
// readFileBestEffort reads a file and returns its contents, or "" on any error.
27+
// Missing cgroup/proc files (e.g. on non-Linux hosts) are expected and benign.
28+
func readFileBestEffort(path string) string {
29+
b, err := os.ReadFile(path)
30+
if err != nil {
31+
return ""
32+
}
33+
return string(b)
34+
}
35+
36+
// systemTotalMemory returns the container-aware total system RAM in bytes.
37+
//
38+
// memory.TotalMemory() reports the HOST kernel total (syscall.Sysinfo on
39+
// Linux), which lxcfs/LXD does NOT virtualize. Inside a container that
40+
// over-reports physical RAM and, combined with the virtualized MemAvailable,
41+
// inflates the reported usage (see issue #8059). We instead derive the total
42+
// from the minimum of all available container-aware candidates.
43+
func systemTotalMemory() uint64 {
44+
return chooseTotalMemory(
45+
readFileBestEffort(cgroupV2MaxPath),
46+
readFileBestEffort(cgroupV1LimitPath),
47+
readFileBestEffort(procMemInfoPath),
48+
memory.TotalMemory(),
49+
)
50+
}
51+
1652
// GetSystemRAMInfo returns real-time system RAM usage
1753
func GetSystemRAMInfo() (*SystemRAMInfo, error) {
18-
total := memory.TotalMemory()
19-
free := memory.AvailableMemory()
54+
total := systemTotalMemory()
55+
available := memory.AvailableMemory()
56+
57+
// AvailableMemory (MemAvailable) is virtualized by lxcfs, so in edge
58+
// cases it can exceed our corrected total; clamp to avoid an unsigned
59+
// underflow when computing Used.
60+
if available > total {
61+
available = total
62+
}
2063

21-
used := total - free
64+
used := total - available
2265

2366
usagePercent := 0.0
2467
if total > 0 {
@@ -27,8 +70,8 @@ func GetSystemRAMInfo() (*SystemRAMInfo, error) {
2770
return &SystemRAMInfo{
2871
Total: total,
2972
Used: used,
30-
Free: free,
31-
Available: total - used,
73+
Free: available,
74+
Available: available,
3275
UsagePercent: usagePercent,
3376
}, nil
3477
}

pkg/xsysinfo/memory_total.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package xsysinfo
2+
3+
import (
4+
"strconv"
5+
"strings"
6+
)
7+
8+
// cgroupV1UnlimitedSentinel is the value the kernel writes to
9+
// memory.limit_in_bytes when no limit is set. It is PAGE_COUNTER_MAX
10+
// (LONG_MAX rounded down to a page boundary), i.e. 0x7FFFFFFFFFFFF000 on
11+
// 4 KiB-page systems. Any value at or above this is treated as "no limit".
12+
const cgroupV1UnlimitedSentinel = uint64(0x7FFFFFFFFFFFF000)
13+
14+
// parseUintField parses a trimmed unsigned integer from raw file contents.
15+
// It returns (0, false) when the content is empty or not a number.
16+
func parseUintField(raw string) (uint64, bool) {
17+
s := strings.TrimSpace(raw)
18+
if s == "" {
19+
return 0, false
20+
}
21+
v, err := strconv.ParseUint(s, 10, 64)
22+
if err != nil {
23+
return 0, false
24+
}
25+
return v, true
26+
}
27+
28+
// parseCgroupV2Max interprets the contents of cgroup v2 memory.max.
29+
// The literal "max" means unlimited, returning 0.
30+
func parseCgroupV2Max(raw string) uint64 {
31+
if strings.TrimSpace(raw) == "max" {
32+
return 0
33+
}
34+
v, ok := parseUintField(raw)
35+
if !ok {
36+
return 0
37+
}
38+
return v
39+
}
40+
41+
// parseCgroupV1Limit interprets the contents of cgroup v1
42+
// memory.limit_in_bytes. The kernel's "unlimited" sentinel (a value at or
43+
// above PAGE_COUNTER_MAX) is treated as no limit, returning 0.
44+
func parseCgroupV1Limit(raw string) uint64 {
45+
v, ok := parseUintField(raw)
46+
if !ok {
47+
return 0
48+
}
49+
if v >= cgroupV1UnlimitedSentinel {
50+
return 0
51+
}
52+
return v
53+
}
54+
55+
// parseMemTotal extracts the MemTotal value (in bytes) from raw
56+
// /proc/meminfo contents. MemTotal is reported in kibibytes, so the parsed
57+
// value is multiplied by 1024. Returns 0 when the field is missing.
58+
func parseMemTotal(raw string) uint64 {
59+
for _, line := range strings.Split(raw, "\n") {
60+
if !strings.HasPrefix(line, "MemTotal:") {
61+
continue
62+
}
63+
fields := strings.Fields(line)
64+
// Expected: ["MemTotal:", "<value>", "kB"]
65+
if len(fields) < 2 {
66+
return 0
67+
}
68+
v, err := strconv.ParseUint(fields[1], 10, 64)
69+
if err != nil {
70+
return 0
71+
}
72+
if len(fields) >= 3 {
73+
switch strings.ToLower(fields[2]) {
74+
case "kb":
75+
return v * 1024
76+
case "mb":
77+
return v * 1024 * 1024
78+
case "gb":
79+
return v * 1024 * 1024 * 1024
80+
}
81+
}
82+
return v
83+
}
84+
return 0
85+
}
86+
87+
// chooseTotalMemory selects the most accurate system RAM total in bytes.
88+
//
89+
// On Linux the host kernel total (sysinfoTotal, from syscall.Sysinfo) is NOT
90+
// virtualized by lxcfs/LXD, so inside a container it over-reports physical
91+
// RAM. The cgroup limits and /proc/meminfo MemTotal, by contrast, do reflect
92+
// the container's view. We therefore take the MINIMUM of all non-zero,
93+
// non-unlimited candidates:
94+
//
95+
// - cgroup v2 memory.max ("max" => unlimited, skipped)
96+
// - cgroup v1 memory.limit_in_bytes (kernel sentinel => unlimited, skipped)
97+
// - /proc/meminfo MemTotal (lxcfs/LXD virtualizes this)
98+
// - sysinfoTotal (bare-metal fallback)
99+
//
100+
// On bare metal the cgroup limits are unlimited and MemTotal == sysinfoTotal,
101+
// so the result equals the host total exactly as before.
102+
func chooseTotalMemory(cgroupV2Max, cgroupV1Limit, procMemInfo string, sysinfoTotal uint64) uint64 {
103+
candidates := []uint64{
104+
parseCgroupV2Max(cgroupV2Max),
105+
parseCgroupV1Limit(cgroupV1Limit),
106+
parseMemTotal(procMemInfo),
107+
sysinfoTotal,
108+
}
109+
110+
var best uint64
111+
for _, c := range candidates {
112+
if c == 0 {
113+
continue
114+
}
115+
if best == 0 || c < best {
116+
best = c
117+
}
118+
}
119+
return best
120+
}

pkg/xsysinfo/memory_total_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package xsysinfo
2+
3+
import (
4+
. "github.com/onsi/ginkgo/v2"
5+
. "github.com/onsi/gomega"
6+
)
7+
8+
var _ = Describe("chooseTotalMemory", func() {
9+
const (
10+
gi128 = uint64(128) * 1024 * 1024 * 1024
11+
gi20 = uint64(20) * 1024 * 1024 * 1024
12+
gi10 = uint64(10) * 1024 * 1024 * 1024
13+
)
14+
15+
// /proc/meminfo MemTotal is in kB; build a snippet for a given byte total.
16+
memInfo := func(bytes uint64) string {
17+
kb := bytes / 1024
18+
return "MemTotal: " + itoa(kb) + " kB\nMemFree: 123 kB\n"
19+
}
20+
21+
Context("bare metal (no cgroup cap, memory.max == max)", func() {
22+
It("uses the host sysinfo total", func() {
23+
// MemTotal mirrors sysinfo on bare metal.
24+
got := chooseTotalMemory("max\n", string(rune(0)), memInfo(gi128), gi128)
25+
Expect(got).To(Equal(gi128))
26+
})
27+
})
28+
29+
Context("LXD/lxcfs container (MemTotal virtualized below host, no cap)", func() {
30+
It("uses the virtualized MemTotal, not the host sysinfo total", func() {
31+
// This is issue #8059: host sysinfo says 128Gi, but lxcfs
32+
// virtualizes /proc/meminfo MemTotal to 20Gi and there is no
33+
// cgroup cap. The corrected total must be 20Gi.
34+
got := chooseTotalMemory("max\n", "", memInfo(gi20), gi128)
35+
Expect(got).To(Equal(gi20))
36+
})
37+
})
38+
39+
Context("cgroup v2 cap set below MemTotal", func() {
40+
It("uses the cgroup cap", func() {
41+
got := chooseTotalMemory(itoa(gi10)+"\n", "", memInfo(gi20), gi128)
42+
Expect(got).To(Equal(gi10))
43+
})
44+
})
45+
46+
Context("cgroup v1 with the kernel unlimited sentinel", func() {
47+
It("ignores the sentinel and falls back to MemTotal", func() {
48+
got := chooseTotalMemory("", "9223372036854771712\n", memInfo(gi20), gi128)
49+
Expect(got).To(Equal(gi20))
50+
})
51+
})
52+
53+
Context("all candidates empty/unlimited", func() {
54+
It("falls back to sysinfo total", func() {
55+
got := chooseTotalMemory("max\n", "", "", gi128)
56+
Expect(got).To(Equal(gi128))
57+
})
58+
})
59+
})
60+
61+
// itoa is a tiny base-10 formatter to avoid importing strconv into the test.
62+
func itoa(v uint64) string {
63+
if v == 0 {
64+
return "0"
65+
}
66+
var buf [20]byte
67+
i := len(buf)
68+
for v > 0 {
69+
i--
70+
buf[i] = byte('0' + v%10)
71+
v /= 10
72+
}
73+
return string(buf[i:])
74+
}

0 commit comments

Comments
 (0)