Skip to content

Commit 86c8665

Browse files
cdownsmb49
authored andcommitted
mm, memcg: throttle allocators based on ancestral memory.high
BugLink: https://bugs.launchpad.net/bugs/1869061 commit e26733e upstream. Prior to this commit, we only directly check the affected cgroup's memory.high against its usage. However, it's possible that we are being reclaimed as a result of hitting an ancestor memory.high and should be penalised based on that, instead. This patch changes memory.high overage throttling to use the largest overage in its ancestors when considering how many penalty jiffies to charge. This makes sure that we penalise poorly behaving cgroups in the same way regardless of at what level of the hierarchy memory.high was breached. Fixes: 0e4b01d ("mm, memcg: throttle allocators when failing reclaim over memory.high") Reported-by: Johannes Weiner <[email protected]> Signed-off-by: Chris Down <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Nathan Chancellor <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: <[email protected]> [5.4.x+] Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.name Signed-off-by: Linus Torvalds <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]> Signed-off-by: Kamal Mostafa <[email protected]>
1 parent e68fd6c commit 86c8665

File tree

1 file changed

+58
-35
lines changed

1 file changed

+58
-35
lines changed

mm/memcontrol.c

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2414,53 +2414,51 @@ static void high_work_func(struct work_struct *work)
24142414
#define MEMCG_DELAY_SCALING_SHIFT 14
24152415

24162416
/*
2417-
* Scheduled by try_charge() to be executed from the userland return path
2418-
* and reclaims memory over the high limit.
2417+
* Get the number of jiffies that we should penalise a mischievous cgroup which
2418+
* is exceeding its memory.high by checking both it and its ancestors.
24192419
*/
2420-
void mem_cgroup_handle_over_high(void)
2420+
static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
2421+
unsigned int nr_pages)
24212422
{
2422-
unsigned long usage, high, clamped_high;
2423-
unsigned long pflags;
2424-
unsigned long penalty_jiffies, overage;
2425-
unsigned int nr_pages = current->memcg_nr_pages_over_high;
2426-
struct mem_cgroup *memcg;
2423+
unsigned long penalty_jiffies;
2424+
u64 max_overage = 0;
24272425

2428-
if (likely(!nr_pages))
2429-
return;
2426+
do {
2427+
unsigned long usage, high;
2428+
u64 overage;
24302429

2431-
memcg = get_mem_cgroup_from_mm(current->mm);
2432-
reclaim_high(memcg, nr_pages, GFP_KERNEL);
2433-
current->memcg_nr_pages_over_high = 0;
2430+
usage = page_counter_read(&memcg->memory);
2431+
high = READ_ONCE(memcg->high);
2432+
2433+
/*
2434+
* Prevent division by 0 in overage calculation by acting as if
2435+
* it was a threshold of 1 page
2436+
*/
2437+
high = max(high, 1UL);
2438+
2439+
overage = usage - high;
2440+
overage <<= MEMCG_DELAY_PRECISION_SHIFT;
2441+
overage = div64_u64(overage, high);
2442+
2443+
if (overage > max_overage)
2444+
max_overage = overage;
2445+
} while ((memcg = parent_mem_cgroup(memcg)) &&
2446+
!mem_cgroup_is_root(memcg));
2447+
2448+
if (!max_overage)
2449+
return 0;
24342450

24352451
/*
2436-
* memory.high is breached and reclaim is unable to keep up. Throttle
2437-
* allocators proactively to slow down excessive growth.
2438-
*
24392452
* We use overage compared to memory.high to calculate the number of
24402453
* jiffies to sleep (penalty_jiffies). Ideally this value should be
24412454
* fairly lenient on small overages, and increasingly harsh when the
24422455
* memcg in question makes it clear that it has no intention of stopping
24432456
* its crazy behaviour, so we exponentially increase the delay based on
24442457
* overage amount.
24452458
*/
2446-
2447-
usage = page_counter_read(&memcg->memory);
2448-
high = READ_ONCE(memcg->high);
2449-
2450-
if (usage <= high)
2451-
goto out;
2452-
2453-
/*
2454-
* Prevent division by 0 in overage calculation by acting as if it was a
2455-
* threshold of 1 page
2456-
*/
2457-
clamped_high = max(high, 1UL);
2458-
2459-
overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
2460-
clamped_high);
2461-
2462-
penalty_jiffies = ((u64)overage * overage * HZ)
2463-
>> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
2459+
penalty_jiffies = max_overage * max_overage * HZ;
2460+
penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
2461+
penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
24642462

24652463
/*
24662464
* Factor in the task's own contribution to the overage, such that four
@@ -2477,7 +2475,32 @@ void mem_cgroup_handle_over_high(void)
24772475
* application moving forwards and also permit diagnostics, albeit
24782476
* extremely slowly.
24792477
*/
2480-
penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
2478+
return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
2479+
}
2480+
2481+
/*
2482+
* Scheduled by try_charge() to be executed from the userland return path
2483+
* and reclaims memory over the high limit.
2484+
*/
2485+
void mem_cgroup_handle_over_high(void)
2486+
{
2487+
unsigned long penalty_jiffies;
2488+
unsigned long pflags;
2489+
unsigned int nr_pages = current->memcg_nr_pages_over_high;
2490+
struct mem_cgroup *memcg;
2491+
2492+
if (likely(!nr_pages))
2493+
return;
2494+
2495+
memcg = get_mem_cgroup_from_mm(current->mm);
2496+
reclaim_high(memcg, nr_pages, GFP_KERNEL);
2497+
current->memcg_nr_pages_over_high = 0;
2498+
2499+
/*
2500+
* memory.high is breached and reclaim is unable to keep up. Throttle
2501+
* allocators proactively to slow down excessive growth.
2502+
*/
2503+
penalty_jiffies = calculate_high_delay(memcg, nr_pages);
24812504

24822505
/*
24832506
* Don't sleep if the amount of jiffies this memcg owes us is so low

0 commit comments

Comments
 (0)