Skip to content

Commit 6b4517a

Browse files
htejunJens Axboe
authored andcommitted
block: implement bd_claiming and claiming block
Currently, device claiming for exclusive open is done after low level open - disk->fops->open() - has completed successfully. This means that exclusive open attempts while a device is already exclusively open will fail only after disk->fops->open() is called. cdrom driver issues commands during open() which means that O_EXCL open attempt can unintentionally inject commands to in-progress command stream for burning thus disturbing burning process. In most cases, this doesn't cause problems because the first command to be issued is TUR which most devices can process in the middle of burning. However, depending on how a device replies to TUR during burning, cdrom driver may end up issuing further commands. This can't be resolved trivially by moving bd_claim() before doing actual open() because that means an open attempt which will end up failing could interfere other legit O_EXCL open attempts. ie. unconfirmed open attempts can fail others. This patch resolves the problem by introducing claiming block which is started by bd_start_claiming() and terminated either by bd_claim() or bd_abort_claiming(). bd_claim() from inside a claiming block is guaranteed to succeed and once a claiming block is started, other bd_start_claiming() or bd_claim() attempts block till the current claiming block is terminated. bd_claim() can still be used standalone although now it always synchronizes against claiming blocks, so the existing users will keep working without any change. blkdev_open() and open_bdev_exclusive() are converted to use claiming blocks so that exclusive open attempts from these functions don't interfere with the existing exclusive open. This problem was discovered while investigating bko#15403. https://bugzilla.kernel.org/show_bug.cgi?id=15403 The burning problem itself can be resolved by updating userspace probing tools to always open w/ O_EXCL. Signed-off-by: Tejun Heo <[email protected]> Reported-by: Matthias-Christian Ott <[email protected]> Cc: Kay Sievers <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 1a3cbbc commit 6b4517a

File tree

2 files changed

+175
-24
lines changed

2 files changed

+175
-24
lines changed

fs/block_dev.c

Lines changed: 174 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -693,24 +693,160 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
693693
return true; /* is a partition of an un-held device */
694694
}
695695

696+
/**
697+
* bd_prepare_to_claim - prepare to claim a block device
698+
* @bdev: block device of interest
699+
* @whole: the whole device containing @bdev, may equal @bdev
700+
* @holder: holder trying to claim @bdev
701+
*
702+
* Prepare to claim @bdev. This function fails if @bdev is already
703+
* claimed by another holder and waits if another claiming is in
704+
* progress. This function doesn't actually claim. On successful
705+
* return, the caller has ownership of bd_claiming and bd_holder[s].
706+
*
707+
* CONTEXT:
708+
* spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab
709+
* it multiple times.
710+
*
711+
* RETURNS:
712+
* 0 if @bdev can be claimed, -EBUSY otherwise.
713+
*/
714+
static int bd_prepare_to_claim(struct block_device *bdev,
715+
struct block_device *whole, void *holder)
716+
{
717+
retry:
718+
/* if someone else claimed, fail */
719+
if (!bd_may_claim(bdev, whole, holder))
720+
return -EBUSY;
721+
722+
/* if someone else is claiming, wait for it to finish */
723+
if (whole->bd_claiming && whole->bd_claiming != holder) {
724+
wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
725+
DEFINE_WAIT(wait);
726+
727+
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
728+
spin_unlock(&bdev_lock);
729+
schedule();
730+
finish_wait(wq, &wait);
731+
spin_lock(&bdev_lock);
732+
goto retry;
733+
}
734+
735+
/* yay, all mine */
736+
return 0;
737+
}
738+
739+
/**
740+
* bd_start_claiming - start claiming a block device
741+
* @bdev: block device of interest
742+
* @holder: holder trying to claim @bdev
743+
*
744+
* @bdev is about to be opened exclusively. Check @bdev can be opened
745+
* exclusively and mark that an exclusive open is in progress. Each
746+
* successful call to this function must be matched with a call to
747+
* either bd_claim() or bd_abort_claiming(). If this function
748+
* succeeds, the matching bd_claim() is guaranteed to succeed.
749+
*
750+
* CONTEXT:
751+
* Might sleep.
752+
*
753+
* RETURNS:
754+
* Pointer to the block device containing @bdev on success, ERR_PTR()
755+
* value on failure.
756+
*/
757+
static struct block_device *bd_start_claiming(struct block_device *bdev,
758+
void *holder)
759+
{
760+
struct gendisk *disk;
761+
struct block_device *whole;
762+
int partno, err;
763+
764+
might_sleep();
765+
766+
/*
767+
* @bdev might not have been initialized properly yet, look up
768+
* and grab the outer block device the hard way.
769+
*/
770+
disk = get_gendisk(bdev->bd_dev, &partno);
771+
if (!disk)
772+
return ERR_PTR(-ENXIO);
773+
774+
whole = bdget_disk(disk, 0);
775+
put_disk(disk);
776+
if (!whole)
777+
return ERR_PTR(-ENOMEM);
778+
779+
/* prepare to claim, if successful, mark claiming in progress */
780+
spin_lock(&bdev_lock);
781+
782+
err = bd_prepare_to_claim(bdev, whole, holder);
783+
if (err == 0) {
784+
whole->bd_claiming = holder;
785+
spin_unlock(&bdev_lock);
786+
return whole;
787+
} else {
788+
spin_unlock(&bdev_lock);
789+
bdput(whole);
790+
return ERR_PTR(err);
791+
}
792+
}
793+
794+
/* releases bdev_lock */
795+
static void __bd_abort_claiming(struct block_device *whole, void *holder)
796+
{
797+
BUG_ON(whole->bd_claiming != holder);
798+
whole->bd_claiming = NULL;
799+
wake_up_bit(&whole->bd_claiming, 0);
800+
801+
spin_unlock(&bdev_lock);
802+
bdput(whole);
803+
}
804+
805+
/**
806+
* bd_abort_claiming - abort claiming a block device
807+
* @whole: whole block device returned by bd_start_claiming()
808+
* @holder: holder trying to claim @bdev
809+
*
810+
* Abort a claiming block started by bd_start_claiming(). Note that
811+
* @whole is not the block device to be claimed but the whole device
812+
* returned by bd_start_claiming().
813+
*
814+
* CONTEXT:
815+
* Grabs and releases bdev_lock.
816+
*/
817+
static void bd_abort_claiming(struct block_device *whole, void *holder)
818+
{
819+
spin_lock(&bdev_lock);
820+
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
821+
}
822+
696823
/**
697824
* bd_claim - claim a block device
698825
* @bdev: block device to claim
699826
* @holder: holder trying to claim @bdev
700827
*
701-
* Try to claim @bdev.
828+
* Try to claim @bdev which must have been opened successfully. This
829+
* function may be called with or without preceding
830+
* blk_start_claiming(). In the former case, this function is always
831+
* successful and terminates the claiming block.
832+
*
833+
* CONTEXT:
834+
* Might sleep.
702835
*
703836
* RETURNS:
704837
* 0 if successful, -EBUSY if @bdev is already claimed.
705838
*/
706839
int bd_claim(struct block_device *bdev, void *holder)
707840
{
708841
struct block_device *whole = bdev->bd_contains;
709-
int res = -EBUSY;
842+
int res;
843+
844+
might_sleep();
710845

711846
spin_lock(&bdev_lock);
712847

713-
if (bd_may_claim(bdev, whole, holder)) {
848+
res = bd_prepare_to_claim(bdev, whole, holder);
849+
if (res == 0) {
714850
/* note that for a whole device bd_holders
715851
* will be incremented twice, and bd_holder will
716852
* be set to bd_claim before being set to holder
@@ -719,10 +855,13 @@ int bd_claim(struct block_device *bdev, void *holder)
719855
whole->bd_holder = bd_claim;
720856
bdev->bd_holders++;
721857
bdev->bd_holder = holder;
722-
res = 0;
723858
}
724859

725-
spin_unlock(&bdev_lock);
860+
if (whole->bd_claiming)
861+
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
862+
else
863+
spin_unlock(&bdev_lock);
864+
726865
return res;
727866
}
728867
EXPORT_SYMBOL(bd_claim);
@@ -1338,6 +1477,7 @@ EXPORT_SYMBOL(blkdev_get);
13381477

13391478
static int blkdev_open(struct inode * inode, struct file * filp)
13401479
{
1480+
struct block_device *whole = NULL;
13411481
struct block_device *bdev;
13421482
int res;
13431483

@@ -1360,22 +1500,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
13601500
if (bdev == NULL)
13611501
return -ENOMEM;
13621502

1503+
if (filp->f_mode & FMODE_EXCL) {
1504+
whole = bd_start_claiming(bdev, filp);
1505+
if (IS_ERR(whole)) {
1506+
bdput(bdev);
1507+
return PTR_ERR(whole);
1508+
}
1509+
}
1510+
13631511
filp->f_mapping = bdev->bd_inode->i_mapping;
13641512

13651513
res = blkdev_get(bdev, filp->f_mode);
1366-
if (res)
1367-
return res;
13681514

1369-
if (filp->f_mode & FMODE_EXCL) {
1370-
res = bd_claim(bdev, filp);
1371-
if (res)
1372-
goto out_blkdev_put;
1515+
if (whole) {
1516+
if (res == 0)
1517+
BUG_ON(bd_claim(bdev, filp) != 0);
1518+
else
1519+
bd_abort_claiming(whole, filp);
13731520
}
13741521

1375-
return 0;
1376-
1377-
out_blkdev_put:
1378-
blkdev_put(bdev, filp->f_mode);
13791522
return res;
13801523
}
13811524

@@ -1586,27 +1729,34 @@ EXPORT_SYMBOL(lookup_bdev);
15861729
*/
15871730
struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
15881731
{
1589-
struct block_device *bdev;
1590-
int error = 0;
1732+
struct block_device *bdev, *whole;
1733+
int error;
15911734

15921735
bdev = lookup_bdev(path);
15931736
if (IS_ERR(bdev))
15941737
return bdev;
15951738

1739+
whole = bd_start_claiming(bdev, holder);
1740+
if (IS_ERR(whole)) {
1741+
bdput(bdev);
1742+
return whole;
1743+
}
1744+
15961745
error = blkdev_get(bdev, mode);
15971746
if (error)
1598-
return ERR_PTR(error);
1747+
goto out_abort_claiming;
1748+
15991749
error = -EACCES;
16001750
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1601-
goto blkdev_put;
1602-
error = bd_claim(bdev, holder);
1603-
if (error)
1604-
goto blkdev_put;
1751+
goto out_blkdev_put;
16051752

1753+
BUG_ON(bd_claim(bdev, holder) != 0);
16061754
return bdev;
1607-
1608-
blkdev_put:
1755+
1756+
out_blkdev_put:
16091757
blkdev_put(bdev, mode);
1758+
out_abort_claiming:
1759+
bd_abort_claiming(whole, holder);
16101760
return ERR_PTR(error);
16111761
}
16121762

include/linux/fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ struct block_device {
651651
int bd_openers;
652652
struct mutex bd_mutex; /* open/close mutex */
653653
struct list_head bd_inodes;
654+
void * bd_claiming;
654655
void * bd_holder;
655656
int bd_holders;
656657
#ifdef CONFIG_SYSFS

0 commit comments

Comments
 (0)