Skip to content

Commit e30b5dc

Browse files
Yan, Zhengtytso
authored andcommitted
ext4: fix fio regression
We (Linux Kernel Performance project) found a regression introduced by commit: f7fec03 ext4: track all extent status in extent status tree The commit causes about 20% performance decrease in fio random write test. Profiler shows that rb_next() uses a lot of CPU time. The call stack is: rb_next ext4_es_find_delayed_extent ext4_map_blocks _ext4_get_block ext4_get_block_write __blockdev_direct_IO ext4_direct_IO generic_file_direct_write __generic_file_aio_write ext4_file_write aio_rw_vect_retry aio_run_iocb do_io_submit sys_io_submit system_call_fastpath io_submit td_io_getevents io_u_queued_complete thread_main main __libc_start_main The cause is that ext4_es_find_delayed_extent() doesn't have an upper bound, it keeps searching until a delayed extent is found. When there are a lots of non-delayed entries in the extent state tree, ext4_es_find_delayed_extent() may uses a lot of CPU time. Reported-by: LKP project <[email protected]> Signed-off-by: Yan, Zheng <[email protected]> Signed-off-by: Zheng Liu <[email protected]> Cc: "Theodore Ts'o" <[email protected]>
1 parent 0d606e2 commit e30b5dc

File tree

5 files changed

+23
-14
lines changed

5 files changed

+23
-14
lines changed

fs/ext4/extents.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode,
36423642
{
36433643
struct extent_status es;
36443644

3645-
ext4_es_find_delayed_extent(inode, lblk_start, &es);
3645+
ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
36463646
if (es.es_len == 0)
36473647
return 0; /* there is no delay extent in this tree */
36483648
else if (es.es_lblk <= lblk_start &&
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
46084608
struct extent_status es;
46094609
ext4_lblk_t block, next_del;
46104610

4611-
ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
4612-
46134611
if (newes->es_pblk == 0) {
4612+
ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
4613+
newes->es_lblk + newes->es_len - 1, &es);
4614+
46144615
/*
46154616
* No extent in extent-tree contains block @newes->es_pblk,
46164617
* then the block may stay in 1)a hole or 2)delayed-extent.
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode,
46304631
}
46314632

46324633
block = newes->es_lblk + newes->es_len;
4633-
ext4_es_find_delayed_extent(inode, block, &es);
4634+
ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
46344635
if (es.es_len == 0)
46354636
next_del = EXT_MAX_BLOCKS;
46364637
else

fs/ext4/extents_status.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,22 +232,25 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
232232
}
233233

234234
/*
235-
* ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk
236-
* if it exists, otherwise, the next extent after @es->lblk.
235+
* ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
236+
* @es->lblk if it exists, otherwise, the next extent after @es->lblk.
237237
*
238238
* @inode: the inode which owns delayed extents
239239
* @lblk: the offset where we start to search
240+
* @end: the offset where we stop to search
240241
* @es: delayed extent that we found
241242
*/
242-
void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
243+
void ext4_es_find_delayed_extent_range(struct inode *inode,
244+
ext4_lblk_t lblk, ext4_lblk_t end,
243245
struct extent_status *es)
244246
{
245247
struct ext4_es_tree *tree = NULL;
246248
struct extent_status *es1 = NULL;
247249
struct rb_node *node;
248250

249251
BUG_ON(es == NULL);
250-
trace_ext4_es_find_delayed_extent_enter(inode, lblk);
252+
BUG_ON(end < lblk);
253+
trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
251254

252255
read_lock(&EXT4_I(inode)->i_es_lock);
253256
tree = &EXT4_I(inode)->i_es_tree;
@@ -270,6 +273,10 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
270273
if (es1 && !ext4_es_is_delayed(es1)) {
271274
while ((node = rb_next(&es1->rb_node)) != NULL) {
272275
es1 = rb_entry(node, struct extent_status, rb_node);
276+
if (es1->es_lblk > end) {
277+
es1 = NULL;
278+
break;
279+
}
273280
if (ext4_es_is_delayed(es1))
274281
break;
275282
}
@@ -285,7 +292,7 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
285292
read_unlock(&EXT4_I(inode)->i_es_lock);
286293

287294
ext4_es_lru_add(inode);
288-
trace_ext4_es_find_delayed_extent_exit(inode, es);
295+
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
289296
}
290297

291298
static struct extent_status *

fs/ext4/extents_status.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
6262
unsigned long long status);
6363
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
6464
ext4_lblk_t len);
65-
extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
65+
extern void ext4_es_find_delayed_extent_range(struct inode *inode,
66+
ext4_lblk_t lblk, ext4_lblk_t end,
6667
struct extent_status *es);
6768
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
6869
struct extent_status *es);

fs/ext4/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
464464
* If there is a delay extent at this offset,
465465
* it will be as a data.
466466
*/
467-
ext4_es_find_delayed_extent(inode, last, &es);
467+
ext4_es_find_delayed_extent_range(inode, last, last, &es);
468468
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
469469
if (last != start)
470470
dataoff = last << blkbits;
@@ -547,7 +547,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
547547
* If there is a delay extent at this offset,
548548
* we will skip this extent.
549549
*/
550-
ext4_es_find_delayed_extent(inode, last, &es);
550+
ext4_es_find_delayed_extent_range(inode, last, last, &es);
551551
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
552552
last = es.es_lblk + es.es_len;
553553
holeoff = last << blkbits;

include/trace/events/ext4.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2139,7 +2139,7 @@ TRACE_EVENT(ext4_es_remove_extent,
21392139
__entry->lblk, __entry->len)
21402140
);
21412141

2142-
TRACE_EVENT(ext4_es_find_delayed_extent_enter,
2142+
TRACE_EVENT(ext4_es_find_delayed_extent_range_enter,
21432143
TP_PROTO(struct inode *inode, ext4_lblk_t lblk),
21442144

21452145
TP_ARGS(inode, lblk),
@@ -2161,7 +2161,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_enter,
21612161
(unsigned long) __entry->ino, __entry->lblk)
21622162
);
21632163

2164-
TRACE_EVENT(ext4_es_find_delayed_extent_exit,
2164+
TRACE_EVENT(ext4_es_find_delayed_extent_range_exit,
21652165
TP_PROTO(struct inode *inode, struct extent_status *es),
21662166

21672167
TP_ARGS(inode, es),

0 commit comments

Comments
 (0)