Skip to content

Commit 5bed49a

Browse files
committed
Merge tag 'for-4.19/post-20180822' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: - Set of bcache fixes and changes (Coly) - The flush warn fix (me) - Small series of BFQ fixes (Paolo) - wbt hang fix (Ming) - blktrace fix (Steven) - blk-mq hardware queue count update fix (Jianchao) - Various little fixes * tag 'for-4.19/post-20180822' of git://git.kernel.dk/linux-block: (31 commits) block/DAC960.c: make some arrays static const, shrinks object size blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter blk-mq: init hctx sched after update ctx and hctx mapping block: remove duplicate initialization tracing/blktrace: Fix to allow setting same value pktcdvd: fix setting of 'ret' error return for a few cases block: change return type to bool block, bfq: return nbytes and not zero from struct cftype .write() method block, bfq: improve code of bfq_bfqq_charge_time block, bfq: reduce write overcharge block, bfq: always update the budget of an entity when needed block, bfq: readd missing reset of parent-entity service blk-wbt: fix IO hang in wbt_wait() block: don't warn for flush on read-only device bcache: add the missing comments for smp_mb()/smp_wmb() bcache: remove unnecessary space before ioctl function pointer arguments bcache: add missing SPDX header bcache: move open brace at end of function definitions to next line bcache: add static const prefix to char * array declarations bcache: fix code comments style ...
2 parents fe6f0ed + 1e7da86 commit 5bed49a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+879
-656
lines changed

block/bfq-cgroup.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,8 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
913913
if (ret)
914914
return ret;
915915

916-
return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
916+
ret = bfq_io_set_weight_legacy(of_css(of), NULL, weight);
917+
return ret ?: nbytes;
917918
}
918919

919920
#ifdef CONFIG_DEBUG_BLK_CGROUP

block/bfq-iosched.c

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,25 @@ static const int bfq_stats_min_budgets = 194;
187187
static const int bfq_default_max_budget = 16 * 1024;
188188

189189
/*
190-
* Async to sync throughput distribution is controlled as follows:
191-
* when an async request is served, the entity is charged the number
192-
* of sectors of the request, multiplied by the factor below
190+
* When a sync request is dispatched, the queue that contains that
191+
* request, and all the ancestor entities of that queue, are charged
192+
* with the number of sectors of the request. In constrast, if the
193+
* request is async, then the queue and its ancestor entities are
194+
* charged with the number of sectors of the request, multiplied by
195+
* the factor below. This throttles the bandwidth for async I/O,
196+
* w.r.t. to sync I/O, and it is done to counter the tendency of async
197+
* writes to steal I/O throughput to reads.
198+
*
199+
* The current value of this parameter is the result of a tuning with
200+
* several hardware and software configurations. We tried to find the
201+
* lowest value for which writes do not cause noticeable problems to
202+
* reads. In fact, the lower this parameter, the stabler I/O control,
203+
* in the following respect. The lower this parameter is, the less
204+
* the bandwidth enjoyed by a group decreases
205+
* - when the group does writes, w.r.t. to when it does reads;
206+
* - when other groups do reads, w.r.t. to when they do writes.
193207
*/
194-
static const int bfq_async_charge_factor = 10;
208+
static const int bfq_async_charge_factor = 3;
195209

196210
/* Default timeout values, in jiffies, approximating CFQ defaults. */
197211
const int bfq_timeout = HZ / 8;
@@ -853,16 +867,7 @@ static unsigned long bfq_serv_to_charge(struct request *rq,
853867
if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
854868
return blk_rq_sectors(rq);
855869

856-
/*
857-
* If there are no weight-raised queues, then amplify service
858-
* by just the async charge factor; otherwise amplify service
859-
* by twice the async charge factor, to further reduce latency
860-
* for weight-raised queues.
861-
*/
862-
if (bfqq->bfqd->wr_busy_queues == 0)
863-
return blk_rq_sectors(rq) * bfq_async_charge_factor;
864-
865-
return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
870+
return blk_rq_sectors(rq) * bfq_async_charge_factor;
866871
}
867872

868873
/**
@@ -3298,6 +3303,27 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
32983303
*/
32993304
} else
33003305
entity->service = 0;
3306+
3307+
/*
3308+
* Reset the received-service counter for every parent entity.
3309+
* Differently from what happens with bfqq->entity.service,
3310+
* the resetting of this counter never needs to be postponed
3311+
* for parent entities. In fact, in case bfqq may have a
3312+
* chance to go on being served using the last, partially
3313+
* consumed budget, bfqq->entity.service needs to be kept,
3314+
* because if bfqq then actually goes on being served using
3315+
* the same budget, the last value of bfqq->entity.service is
3316+
* needed to properly decrement bfqq->entity.budget by the
3317+
* portion already consumed. In contrast, it is not necessary
3318+
* to keep entity->service for parent entities too, because
3319+
* the bubble up of the new value of bfqq->entity.budget will
3320+
* make sure that the budgets of parent entities are correct,
3321+
* even in case bfqq and thus parent entities go on receiving
3322+
* service with the same budget.
3323+
*/
3324+
entity = entity->parent;
3325+
for_each_entity(entity)
3326+
entity->service = 0;
33013327
}
33023328

33033329
/*

block/bfq-wf2q.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,14 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
130130
if (!change_without_lookup) /* lookup needed */
131131
next_in_service = bfq_lookup_next_entity(sd, expiration);
132132

133-
if (next_in_service)
134-
parent_sched_may_change = !sd->next_in_service ||
133+
if (next_in_service) {
134+
bool new_budget_triggers_change =
135135
bfq_update_parent_budget(next_in_service);
136136

137+
parent_sched_may_change = !sd->next_in_service ||
138+
new_budget_triggers_change;
139+
}
140+
137141
sd->next_in_service = next_in_service;
138142

139143
if (!next_in_service)
@@ -877,15 +881,11 @@ void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
877881
unsigned long time_ms)
878882
{
879883
struct bfq_entity *entity = &bfqq->entity;
880-
int tot_serv_to_charge = entity->service;
881-
unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
882-
883-
if (time_ms > 0 && time_ms < timeout_ms)
884-
tot_serv_to_charge =
885-
(bfqd->bfq_max_budget * time_ms) / timeout_ms;
886-
887-
if (tot_serv_to_charge < entity->service)
888-
tot_serv_to_charge = entity->service;
884+
unsigned long timeout_ms = jiffies_to_msecs(bfq_timeout);
885+
unsigned long bounded_time_ms = min(time_ms, timeout_ms);
886+
int serv_to_charge_for_time =
887+
(bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms;
888+
int tot_serv_to_charge = max(serv_to_charge_for_time, entity->service);
889889

890890
/* Increase budget to avoid inconsistencies */
891891
if (tot_serv_to_charge > entity->budget)

block/blk-core.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
10361036
laptop_mode_timer_fn, 0);
10371037
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
10381038
INIT_WORK(&q->timeout_work, NULL);
1039-
INIT_LIST_HEAD(&q->queue_head);
10401039
INIT_LIST_HEAD(&q->timeout_list);
10411040
INIT_LIST_HEAD(&q->icq_list);
10421041
#ifdef CONFIG_BLK_CGROUP
@@ -2162,7 +2161,9 @@ static inline bool should_fail_request(struct hd_struct *part,
21622161

21632162
static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
21642163
{
2165-
if (part->policy && op_is_write(bio_op(bio))) {
2164+
const int op = bio_op(bio);
2165+
2166+
if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
21662167
char b[BDEVNAME_SIZE];
21672168

21682169
WARN_ONCE(1,

block/blk-mq-sched.c

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
462462
blk_mq_sched_free_tags(set, hctx, i);
463463
}
464464

465-
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
466-
unsigned int hctx_idx)
467-
{
468-
struct elevator_queue *e = q->elevator;
469-
int ret;
470-
471-
if (!e)
472-
return 0;
473-
474-
ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
475-
if (ret)
476-
return ret;
477-
478-
if (e->type->ops.mq.init_hctx) {
479-
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
480-
if (ret) {
481-
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
482-
return ret;
483-
}
484-
}
485-
486-
blk_mq_debugfs_register_sched_hctx(q, hctx);
487-
488-
return 0;
489-
}
490-
491-
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
492-
unsigned int hctx_idx)
493-
{
494-
struct elevator_queue *e = q->elevator;
495-
496-
if (!e)
497-
return;
498-
499-
blk_mq_debugfs_unregister_sched_hctx(hctx);
500-
501-
if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
502-
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
503-
hctx->sched_data = NULL;
504-
}
505-
506-
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
507-
}
508-
509465
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
510466
{
511467
struct blk_mq_hw_ctx *hctx;

block/blk-mq-sched.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
2828
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
2929
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
3030

31-
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
32-
unsigned int hctx_idx);
33-
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
34-
unsigned int hctx_idx);
35-
3631
static inline bool
3732
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
3833
{

block/blk-mq-tag.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,18 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
320320
struct blk_mq_hw_ctx *hctx;
321321
int i;
322322

323+
/*
324+
* __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
325+
* queue_hw_ctx after freeze the queue. So we could use q_usage_counter
326+
* to avoid race with it. __blk_mq_update_nr_hw_queues will users
327+
* synchronize_rcu to ensure all of the users go out of the critical
328+
* section below and see zeroed q_usage_counter.
329+
*/
330+
rcu_read_lock();
331+
if (percpu_ref_is_zero(&q->q_usage_counter)) {
332+
rcu_read_unlock();
333+
return;
334+
}
323335

324336
queue_for_each_hw_ctx(q, hctx, i) {
325337
struct blk_mq_tags *tags = hctx->tags;
@@ -335,7 +347,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
335347
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
336348
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
337349
}
338-
350+
rcu_read_unlock();
339351
}
340352

341353
static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,

block/blk-mq.c

Lines changed: 88 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2145,8 +2145,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
21452145
if (set->ops->exit_request)
21462146
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
21472147

2148-
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
2149-
21502148
if (set->ops->exit_hctx)
21512149
set->ops->exit_hctx(hctx, hctx_idx);
21522150

@@ -2214,12 +2212,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
22142212
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
22152213
goto free_bitmap;
22162214

2217-
if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
2218-
goto exit_hctx;
2219-
22202215
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
22212216
if (!hctx->fq)
2222-
goto sched_exit_hctx;
2217+
goto exit_hctx;
22232218

22242219
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
22252220
goto free_fq;
@@ -2233,8 +2228,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
22332228

22342229
free_fq:
22352230
kfree(hctx->fq);
2236-
sched_exit_hctx:
2237-
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
22382231
exit_hctx:
22392232
if (set->ops->exit_hctx)
22402233
set->ops->exit_hctx(hctx, hctx_idx);
@@ -2896,10 +2889,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
28962889
return ret;
28972890
}
28982891

2892+
/*
2893+
* request_queue and elevator_type pair.
2894+
* It is just used by __blk_mq_update_nr_hw_queues to cache
2895+
* the elevator_type associated with a request_queue.
2896+
*/
2897+
struct blk_mq_qe_pair {
2898+
struct list_head node;
2899+
struct request_queue *q;
2900+
struct elevator_type *type;
2901+
};
2902+
2903+
/*
2904+
* Cache the elevator_type in qe pair list and switch the
2905+
* io scheduler to 'none'
2906+
*/
2907+
static bool blk_mq_elv_switch_none(struct list_head *head,
2908+
struct request_queue *q)
2909+
{
2910+
struct blk_mq_qe_pair *qe;
2911+
2912+
if (!q->elevator)
2913+
return true;
2914+
2915+
qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
2916+
if (!qe)
2917+
return false;
2918+
2919+
INIT_LIST_HEAD(&qe->node);
2920+
qe->q = q;
2921+
qe->type = q->elevator->type;
2922+
list_add(&qe->node, head);
2923+
2924+
mutex_lock(&q->sysfs_lock);
2925+
/*
2926+
* After elevator_switch_mq, the previous elevator_queue will be
2927+
* released by elevator_release. The reference of the io scheduler
2928+
* module get by elevator_get will also be put. So we need to get
2929+
* a reference of the io scheduler module here to prevent it to be
2930+
* removed.
2931+
*/
2932+
__module_get(qe->type->elevator_owner);
2933+
elevator_switch_mq(q, NULL);
2934+
mutex_unlock(&q->sysfs_lock);
2935+
2936+
return true;
2937+
}
2938+
2939+
static void blk_mq_elv_switch_back(struct list_head *head,
2940+
struct request_queue *q)
2941+
{
2942+
struct blk_mq_qe_pair *qe;
2943+
struct elevator_type *t = NULL;
2944+
2945+
list_for_each_entry(qe, head, node)
2946+
if (qe->q == q) {
2947+
t = qe->type;
2948+
break;
2949+
}
2950+
2951+
if (!t)
2952+
return;
2953+
2954+
list_del(&qe->node);
2955+
kfree(qe);
2956+
2957+
mutex_lock(&q->sysfs_lock);
2958+
elevator_switch_mq(q, t);
2959+
mutex_unlock(&q->sysfs_lock);
2960+
}
2961+
28992962
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
29002963
int nr_hw_queues)
29012964
{
29022965
struct request_queue *q;
2966+
LIST_HEAD(head);
29032967

29042968
lockdep_assert_held(&set->tag_list_lock);
29052969

@@ -2910,6 +2974,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
29102974

29112975
list_for_each_entry(q, &set->tag_list, tag_set_list)
29122976
blk_mq_freeze_queue(q);
2977+
/*
2978+
* Sync with blk_mq_queue_tag_busy_iter.
2979+
*/
2980+
synchronize_rcu();
2981+
/*
2982+
* Switch IO scheduler to 'none', cleaning up the data associated
2983+
* with the previous scheduler. We will switch back once we are done
2984+
* updating the new sw to hw queue mappings.
2985+
*/
2986+
list_for_each_entry(q, &set->tag_list, tag_set_list)
2987+
if (!blk_mq_elv_switch_none(&head, q))
2988+
goto switch_back;
29132989

29142990
set->nr_hw_queues = nr_hw_queues;
29152991
blk_mq_update_queue_map(set);
@@ -2918,6 +2994,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
29182994
blk_mq_queue_reinit(q);
29192995
}
29202996

2997+
switch_back:
2998+
list_for_each_entry(q, &set->tag_list, tag_set_list)
2999+
blk_mq_elv_switch_back(&head, q);
3000+
29213001
list_for_each_entry(q, &set->tag_list, tag_set_list)
29223002
blk_mq_unfreeze_queue(q);
29233003
}

block/blk-wbt.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,8 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
576576
struct rq_wb *rwb = RQWB(rqos);
577577
enum wbt_flags flags;
578578

579-
if (!rwb_enabled(rwb))
580-
return;
581-
582579
flags = bio_to_wbt_flags(rwb, bio);
583-
584-
if (!wbt_should_throttle(rwb, bio)) {
580+
if (!(flags & WBT_TRACKED)) {
585581
if (flags & WBT_READ)
586582
wb_timestamp(rwb, &rwb->last_issue);
587583
return;

0 commit comments

Comments
 (0)