Skip to content

Commit 004ed42

Browse files
danieljordan10torvalds
authored andcommitted
padata: add basic support for multithreaded jobs
Sometimes the kernel doesn't take full advantage of system memory bandwidth, leading to a single CPU spending excessive time in initialization paths where the data scales with memory size. Multithreading naturally addresses this problem. Extend padata, a framework that handles many parallel yet singlethreaded jobs, to also handle multithreaded jobs by adding support for splitting up the work evenly, specifying a minimum amount of work that's appropriate for one helper thread to do, load balancing between helpers, and coordinating them. This is inspired by work from Pavel Tatashin and Steve Sistare. Signed-off-by: Daniel Jordan <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Tested-by: Josh Triplett <[email protected]> Cc: Alexander Duyck <[email protected]> Cc: Alex Williamson <[email protected]> Cc: Dan Williams <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Herbert Xu <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Kirill Tkhai <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Pavel Machek <[email protected]> Cc: Pavel Tatashin <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Randy Dunlap <[email protected]> Cc: Robert Elliott <[email protected]> Cc: Shile Zhang <[email protected]> Cc: Steffen Klassert <[email protected]> Cc: Steven Sistare <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Zi Yan <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 4611ce2 commit 004ed42

File tree

2 files changed

+178
-3
lines changed

2 files changed

+178
-3
lines changed

include/linux/padata.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
*
55
* Copyright (C) 2008, 2009 secunet Security Networks AG
66
* Copyright (C) 2008, 2009 Steffen Klassert <[email protected]>
7+
*
8+
* Copyright (c) 2020 Oracle and/or its affiliates.
9+
* Author: Daniel Jordan <[email protected]>
710
*/
811

912
#ifndef PADATA_H
@@ -130,6 +133,31 @@ struct padata_shell {
130133
struct list_head list;
131134
};
132135

136+
/**
137+
* struct padata_mt_job - represents one multithreaded job
138+
*
139+
* @thread_fn: Called for each chunk of work that a padata thread does.
140+
* @fn_arg: The thread function argument.
141+
* @start: The start of the job (units are job-specific).
142+
* @size: size of this node's work (units are job-specific).
143+
* @align: Ranges passed to the thread function fall on this boundary, with the
144+
* possible exceptions of the beginning and end of the job.
145+
* @min_chunk: The minimum chunk size in job-specific units. This allows
146+
* the client to communicate the minimum amount of work that's
147+
* appropriate for one worker thread to do at once.
148+
* @max_threads: Max threads to use for the job, actual number may be less
149+
* depending on task size and minimum chunk size.
150+
*/
151+
struct padata_mt_job {
152+
void (*thread_fn)(unsigned long start, unsigned long end, void *arg);
153+
void *fn_arg;
154+
unsigned long start;
155+
unsigned long size;
156+
unsigned long align;
157+
unsigned long min_chunk;
158+
int max_threads;
159+
};
160+
133161
/**
134162
* struct padata_instance - The overall control structure.
135163
*
@@ -173,6 +201,7 @@ extern void padata_free_shell(struct padata_shell *ps);
173201
extern int padata_do_parallel(struct padata_shell *ps,
174202
struct padata_priv *padata, int *cb_cpu);
175203
extern void padata_do_serial(struct padata_priv *padata);
204+
extern void __init padata_do_multithreaded(struct padata_mt_job *job);
176205
extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
177206
cpumask_var_t cpumask);
178207
extern int padata_start(struct padata_instance *pinst);

kernel/padata.c

Lines changed: 149 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
* Copyright (C) 2008, 2009 secunet Security Networks AG
88
* Copyright (C) 2008, 2009 Steffen Klassert <[email protected]>
99
*
10+
* Copyright (c) 2020 Oracle and/or its affiliates.
11+
* Author: Daniel Jordan <[email protected]>
12+
*
1013
* This program is free software; you can redistribute it and/or modify it
1114
* under the terms and conditions of the GNU General Public License,
1215
* version 2, as published by the Free Software Foundation.
@@ -21,6 +24,7 @@
2124
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
2225
*/
2326

27+
#include <linux/completion.h>
2428
#include <linux/export.h>
2529
#include <linux/cpumask.h>
2630
#include <linux/err.h>
@@ -32,6 +36,8 @@
3236
#include <linux/sysfs.h>
3337
#include <linux/rcupdate.h>
3438

39+
#define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */
40+
3541
struct padata_work {
3642
struct work_struct pw_work;
3743
struct list_head pw_list; /* padata_free_works linkage */
@@ -42,7 +48,17 @@ static DEFINE_SPINLOCK(padata_works_lock);
4248
static struct padata_work *padata_works;
4349
static LIST_HEAD(padata_free_works);
4450

51+
struct padata_mt_job_state {
52+
spinlock_t lock;
53+
struct completion completion;
54+
struct padata_mt_job *job;
55+
int nworks;
56+
int nworks_fini;
57+
unsigned long chunk_size;
58+
};
59+
4560
static void padata_free_pd(struct parallel_data *pd);
61+
static void __init padata_mt_helper(struct work_struct *work);
4662

4763
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
4864
{
@@ -81,18 +97,56 @@ static struct padata_work *padata_work_alloc(void)
8197
}
8298

8399
static void padata_work_init(struct padata_work *pw, work_func_t work_fn,
84-
void *data)
100+
void *data, int flags)
85101
{
86-
INIT_WORK(&pw->pw_work, work_fn);
102+
if (flags & PADATA_WORK_ONSTACK)
103+
INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
104+
else
105+
INIT_WORK(&pw->pw_work, work_fn);
87106
pw->pw_data = data;
88107
}
89108

109+
static int __init padata_work_alloc_mt(int nworks, void *data,
110+
struct list_head *head)
111+
{
112+
int i;
113+
114+
spin_lock(&padata_works_lock);
115+
/* Start at 1 because the current task participates in the job. */
116+
for (i = 1; i < nworks; ++i) {
117+
struct padata_work *pw = padata_work_alloc();
118+
119+
if (!pw)
120+
break;
121+
padata_work_init(pw, padata_mt_helper, data, 0);
122+
list_add(&pw->pw_list, head);
123+
}
124+
spin_unlock(&padata_works_lock);
125+
126+
return i;
127+
}
128+
90129
static void padata_work_free(struct padata_work *pw)
91130
{
92131
lockdep_assert_held(&padata_works_lock);
93132
list_add(&pw->pw_list, &padata_free_works);
94133
}
95134

135+
static void __init padata_works_free(struct list_head *works)
136+
{
137+
struct padata_work *cur, *next;
138+
139+
if (list_empty(works))
140+
return;
141+
142+
spin_lock(&padata_works_lock);
143+
list_for_each_entry_safe(cur, next, works, pw_list) {
144+
list_del(&cur->pw_list);
145+
padata_work_free(cur);
146+
}
147+
spin_unlock(&padata_works_lock);
148+
}
149+
96150
static void padata_parallel_worker(struct work_struct *parallel_work)
97151
{
98152
struct padata_work *pw = container_of(parallel_work, struct padata_work,
@@ -168,7 +222,7 @@ int padata_do_parallel(struct padata_shell *ps,
168222
pw = padata_work_alloc();
169223
spin_unlock(&padata_works_lock);
170224
if (pw) {
171-
padata_work_init(pw, padata_parallel_worker, padata);
225+
padata_work_init(pw, padata_parallel_worker, padata, 0);
172226
queue_work(pinst->parallel_wq, &pw->pw_work);
173227
} else {
174228
/* Maximum works limit exceeded, run in the current task. */
@@ -409,6 +463,98 @@ static int pd_setup_cpumasks(struct parallel_data *pd,
409463
return err;
410464
}
411465

466+
static void __init padata_mt_helper(struct work_struct *w)
467+
{
468+
struct padata_work *pw = container_of(w, struct padata_work, pw_work);
469+
struct padata_mt_job_state *ps = pw->pw_data;
470+
struct padata_mt_job *job = ps->job;
471+
bool done;
472+
473+
spin_lock(&ps->lock);
474+
475+
while (job->size > 0) {
476+
unsigned long start, size, end;
477+
478+
start = job->start;
479+
/* So end is chunk size aligned if enough work remains. */
480+
size = roundup(start + 1, ps->chunk_size) - start;
481+
size = min(size, job->size);
482+
end = start + size;
483+
484+
job->start = end;
485+
job->size -= size;
486+
487+
spin_unlock(&ps->lock);
488+
job->thread_fn(start, end, job->fn_arg);
489+
spin_lock(&ps->lock);
490+
}
491+
492+
++ps->nworks_fini;
493+
done = (ps->nworks_fini == ps->nworks);
494+
spin_unlock(&ps->lock);
495+
496+
if (done)
497+
complete(&ps->completion);
498+
}
499+
500+
/**
501+
* padata_do_multithreaded - run a multithreaded job
502+
* @job: Description of the job.
503+
*
504+
* See the definition of struct padata_mt_job for more details.
505+
*/
506+
void __init padata_do_multithreaded(struct padata_mt_job *job)
507+
{
508+
/* In case threads finish at different times. */
509+
static const unsigned long load_balance_factor = 4;
510+
struct padata_work my_work, *pw;
511+
struct padata_mt_job_state ps;
512+
LIST_HEAD(works);
513+
int nworks;
514+
515+
if (job->size == 0)
516+
return;
517+
518+
/* Ensure at least one thread when size < min_chunk. */
519+
nworks = max(job->size / job->min_chunk, 1ul);
520+
nworks = min(nworks, job->max_threads);
521+
522+
if (nworks == 1) {
523+
/* Single thread, no coordination needed, cut to the chase. */
524+
job->thread_fn(job->start, job->start + job->size, job->fn_arg);
525+
return;
526+
}
527+
528+
spin_lock_init(&ps.lock);
529+
init_completion(&ps.completion);
530+
ps.job = job;
531+
ps.nworks = padata_work_alloc_mt(nworks, &ps, &works);
532+
ps.nworks_fini = 0;
533+
534+
/*
535+
* Chunk size is the amount of work a helper does per call to the
536+
* thread function. Load balance large jobs between threads by
537+
* increasing the number of chunks, guarantee at least the minimum
538+
* chunk size from the caller, and honor the caller's alignment.
539+
*/
540+
ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
541+
ps.chunk_size = max(ps.chunk_size, job->min_chunk);
542+
ps.chunk_size = roundup(ps.chunk_size, job->align);
543+
544+
list_for_each_entry(pw, &works, pw_list)
545+
queue_work(system_unbound_wq, &pw->pw_work);
546+
547+
/* Use the current thread, which saves starting a workqueue worker. */
548+
padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK);
549+
padata_mt_helper(&my_work.pw_work);
550+
551+
/* Wait for all the helpers to finish. */
552+
wait_for_completion(&ps.completion);
553+
554+
destroy_work_on_stack(&my_work.pw_work);
555+
padata_works_free(&works);
556+
}
557+
412558
static void __padata_list_init(struct padata_list *pd_list)
413559
{
414560
INIT_LIST_HEAD(&pd_list->list);

0 commit comments

Comments
 (0)