Skip to content

Commit 6426c7a

Browse files
Qu Wenruokdave
authored andcommitted
btrfs: qgroup: Fix qgroup accounting when creating snapshot
Current btrfs qgroup design implies a requirement that after calling btrfs_qgroup_account_extents() there must be a commit root switch. Normally this is OK, as btrfs_qgroup_accounting_extents() is only called inside btrfs_commit_transaction() just be commit_cowonly_roots(). However there is a exception at create_pending_snapshot(), which will call btrfs_qgroup_account_extents() but no any commit root switch. In case of creating a snapshot whose parent root is itself (create a snapshot of fs tree), it will corrupt qgroup by the following trace: (skipped unrelated data) ====== btrfs_qgroup_account_extent: bytenr = 29786112, num_bytes = 16384, nr_old_roots = 0, nr_new_roots = 1 qgroup_update_counters: qgid = 5, cur_old_count = 0, cur_new_count = 1, rfer = 0, excl = 0 qgroup_update_counters: qgid = 5, cur_old_count = 0, cur_new_count = 1, rfer = 16384, excl = 16384 btrfs_qgroup_account_extent: bytenr = 29786112, num_bytes = 16384, nr_old_roots = 0, nr_new_roots = 0 ====== The problem here is in first qgroup_account_extent(), the nr_new_roots of the extent is 1, which means its reference got increased, and qgroup increased its rfer and excl. But at second qgroup_account_extent(), its reference got decreased, but between these two qgroup_account_extent(), there is no switch roots. This leads to the same nr_old_roots, and this extent just got ignored by qgroup, which means this extent is wrongly accounted. Fix it by call commit_cowonly_roots() after qgroup_account_extent() in create_pending_snapshot(), with needed preparation. Mark: I added a check at the top of qgroup_account_snapshot() to skip this code if qgroups are turned off. xfstest btrfs/122 exposes this problem. Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Mark Fasheh <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 72928f2 commit 6426c7a

File tree

1 file changed

+105
-24
lines changed

1 file changed

+105
-24
lines changed

fs/btrfs/transaction.c

Lines changed: 105 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,11 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
311311
* when the transaction commits
312312
*/
313313
static int record_root_in_trans(struct btrfs_trans_handle *trans,
314-
struct btrfs_root *root)
314+
struct btrfs_root *root,
315+
int force)
315316
{
316-
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
317-
root->last_trans < trans->transid) {
317+
if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
318+
root->last_trans < trans->transid) || force) {
318319
WARN_ON(root == root->fs_info->extent_root);
319320
WARN_ON(root->commit_root != root->node);
320321

@@ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
331332
smp_wmb();
332333

333334
spin_lock(&root->fs_info->fs_roots_radix_lock);
334-
if (root->last_trans == trans->transid) {
335+
if (root->last_trans == trans->transid && !force) {
335336
spin_unlock(&root->fs_info->fs_roots_radix_lock);
336337
return 0;
337338
}
@@ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
402403
return 0;
403404

404405
mutex_lock(&root->fs_info->reloc_mutex);
405-
record_root_in_trans(trans, root);
406+
record_root_in_trans(trans, root, 0);
406407
mutex_unlock(&root->fs_info->reloc_mutex);
407408

408409
return 0;
@@ -1310,6 +1311,92 @@ int btrfs_defrag_root(struct btrfs_root *root)
13101311
return ret;
13111312
}
13121313

1314+
/*
1315+
* Do all special snapshot related qgroup dirty hack.
1316+
*
1317+
* Will do all needed qgroup inherit and dirty hack like switch commit
1318+
* roots inside one transaction and write all btree into disk, to make
1319+
* qgroup works.
1320+
*/
1321+
static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
1322+
struct btrfs_root *src,
1323+
struct btrfs_root *parent,
1324+
struct btrfs_qgroup_inherit *inherit,
1325+
u64 dst_objectid)
1326+
{
1327+
struct btrfs_fs_info *fs_info = src->fs_info;
1328+
int ret;
1329+
1330+
/*
1331+
* Save some performance in the case that qgroups are not
1332+
* enabled. If this check races with the ioctl, rescan will
1333+
* kick in anyway.
1334+
*/
1335+
mutex_lock(&fs_info->qgroup_ioctl_lock);
1336+
if (!fs_info->quota_enabled) {
1337+
mutex_unlock(&fs_info->qgroup_ioctl_lock);
1338+
return 0;
1339+
}
1340+
mutex_unlock(&fs_info->qgroup_ioctl_lock);
1341+
1342+
/*
1343+
* We are going to commit transaction, see btrfs_commit_transaction()
1344+
* comment for reason locking tree_log_mutex
1345+
*/
1346+
mutex_lock(&fs_info->tree_log_mutex);
1347+
1348+
ret = commit_fs_roots(trans, src);
1349+
if (ret)
1350+
goto out;
1351+
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
1352+
if (ret < 0)
1353+
goto out;
1354+
ret = btrfs_qgroup_account_extents(trans, fs_info);
1355+
if (ret < 0)
1356+
goto out;
1357+
1358+
/* Now qgroup are all updated, we can inherit it to new qgroups */
1359+
ret = btrfs_qgroup_inherit(trans, fs_info,
1360+
src->root_key.objectid, dst_objectid,
1361+
inherit);
1362+
if (ret < 0)
1363+
goto out;
1364+
1365+
/*
1366+
* Now we do a simplified commit transaction, which will:
1367+
* 1) commit all subvolume and extent tree
1368+
* To ensure all subvolume and extent tree have a valid
1369+
* commit_root to accounting later insert_dir_item()
1370+
* 2) write all btree blocks onto disk
1371+
* This is to make sure later btree modification will be cowed
1372+
* Or commit_root can be populated and cause wrong qgroup numbers
1373+
* In this simplified commit, we don't really care about other trees
1374+
* like chunk and root tree, as they won't affect qgroup.
1375+
* And we don't write super to avoid half committed status.
1376+
*/
1377+
ret = commit_cowonly_roots(trans, src);
1378+
if (ret)
1379+
goto out;
1380+
switch_commit_roots(trans->transaction, fs_info);
1381+
ret = btrfs_write_and_wait_transaction(trans, src);
1382+
if (ret)
1383+
btrfs_std_error(fs_info, ret,
1384+
"Error while writing out transaction for qgroup");
1385+
1386+
out:
1387+
mutex_unlock(&fs_info->tree_log_mutex);
1388+
1389+
/*
1390+
* Force parent root to be updated, as we recorded it before so its
1391+
* last_trans == cur_transid.
1392+
* Or it won't be committed again onto disk after later
1393+
* insert_dir_item()
1394+
*/
1395+
if (!ret)
1396+
record_root_in_trans(trans, parent, 1);
1397+
return ret;
1398+
}
1399+
13131400
/*
13141401
* new snapshots need to be created at a very specific time in the
13151402
* transaction commit. This does the actual creation.
@@ -1383,7 +1470,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
13831470
dentry = pending->dentry;
13841471
parent_inode = pending->dir;
13851472
parent_root = BTRFS_I(parent_inode)->root;
1386-
record_root_in_trans(trans, parent_root);
1473+
record_root_in_trans(trans, parent_root, 0);
13871474

13881475
cur_time = current_fs_time(parent_inode->i_sb);
13891476

@@ -1420,7 +1507,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
14201507
goto fail;
14211508
}
14221509

1423-
record_root_in_trans(trans, root);
1510+
record_root_in_trans(trans, root, 0);
14241511
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
14251512
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
14261513
btrfs_check_and_init_root_item(new_root_item);
@@ -1516,6 +1603,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
15161603
goto fail;
15171604
}
15181605

1606+
/*
1607+
* Do special qgroup accounting for snapshot, as we do some qgroup
1608+
* snapshot hack to do fast snapshot.
1609+
* To co-operate with that hack, we do hack again.
1610+
* Or snapshot will be greatly slowed down by a subtree qgroup rescan
1611+
*/
1612+
ret = qgroup_account_snapshot(trans, root, parent_root,
1613+
pending->inherit, objectid);
1614+
if (ret < 0)
1615+
goto fail;
1616+
15191617
ret = btrfs_insert_dir_item(trans, parent_root,
15201618
dentry->d_name.name, dentry->d_name.len,
15211619
parent_inode, &key,
@@ -1559,23 +1657,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
15591657
goto fail;
15601658
}
15611659

1562-
/*
1563-
* account qgroup counters before qgroup_inherit()
1564-
*/
1565-
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
1566-
if (ret)
1567-
goto fail;
1568-
ret = btrfs_qgroup_account_extents(trans, fs_info);
1569-
if (ret)
1570-
goto fail;
1571-
ret = btrfs_qgroup_inherit(trans, fs_info,
1572-
root->root_key.objectid,
1573-
objectid, pending->inherit);
1574-
if (ret) {
1575-
btrfs_abort_transaction(trans, root, ret);
1576-
goto fail;
1577-
}
1578-
15791660
fail:
15801661
pending->error = ret;
15811662
dir_item_existed:

0 commit comments

Comments
 (0)