Skip to content

Commit ffcc393

Browse files
committed
dm: enhance internal suspend and resume interface
Rename dm_internal_{suspend,resume} to dm_internal_{suspend,resume}_fast -- dm-stats will continue using these methods to avoid all the extra suspend/resume logic that is not needed in order to quickly flush IO. Introduce dm_internal_suspend_noflush() variant that actually calls the mapped_device's target callbacks -- otherwise target-specific hooks are avoided (e.g. dm-thin's thin_presuspend and thin_postsuspend). Common code between dm_internal_{suspend_noflush,resume} and dm_{suspend,resume} was factored out as __dm_{suspend,resume}. Update dm_internal_{suspend_noflush,resume} to always take and release the mapped_device's suspend_lock. Also update dm_{suspend,resume} to be aware of potential for DM_INTERNAL_SUSPEND_FLAG to be set and respond accordingly by interruptibly waiting for the DM_INTERNAL_SUSPEND_FLAG to be cleared. Add lockdep annotation to dm_suspend() and dm_resume(). The existing DM_SUSPEND_FLAG remains unchanged. DM_INTERNAL_SUSPEND_FLAG is set by dm_internal_suspend_noflush() and cleared by dm_internal_resume(). Both DM_SUSPEND_FLAG and DM_INTERNAL_SUSPEND_FLAG may be set if a device was already suspended when dm_internal_suspend_noflush() was called -- this can be thought of as a "nested suspend". A "nested suspend" can occur with legacy userspace dm-thin code that might suspend all active thin volumes before suspending the pool for resize. But otherwise, in the normal dm-thin-pool suspend case moving forward: the thin-pool will have DM_SUSPEND_FLAG set and all active thins from that thin-pool will have DM_INTERNAL_SUSPEND_FLAG set. Also add DM_INTERNAL_SUSPEND_FLAG to status report. This new DM_INTERNAL_SUSPEND_FLAG state is being reported to assist with debugging (e.g. 'dmsetup info' will report an internally suspended device accordingly). Signed-off-by: Mike Snitzer <[email protected]> Acked-by: Joe Thornber <[email protected]>
1 parent 80e96c5 commit ffcc393

File tree

5 files changed

+192
-58
lines changed

5 files changed

+192
-58
lines changed

drivers/md/dm-ioctl.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
684684
int srcu_idx;
685685

686686
param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
687-
DM_ACTIVE_PRESENT_FLAG);
687+
DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG);
688688

689689
if (dm_suspended_md(md))
690690
param->flags |= DM_SUSPEND_FLAG;
691691

692+
if (dm_suspended_internally_md(md))
693+
param->flags |= DM_INTERNAL_SUSPEND_FLAG;
694+
692695
if (dm_test_deferred_remove_flag(md))
693696
param->flags |= DM_DEFERRED_REMOVE;
694697

drivers/md/dm-stats.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md,
824824
return 1;
825825

826826
id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
827-
dm_internal_suspend, dm_internal_resume, md);
827+
dm_internal_suspend_fast, dm_internal_resume_fast, md);
828828
if (id < 0)
829829
return id;
830830

drivers/md/dm.c

Lines changed: 173 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/idr.h>
2020
#include <linux/hdreg.h>
2121
#include <linux/delay.h>
22+
#include <linux/wait.h>
2223

2324
#include <trace/events/block.h>
2425

@@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
117118
#define DMF_NOFLUSH_SUSPENDING 5
118119
#define DMF_MERGE_IS_OPTIONAL 6
119120
#define DMF_DEFERRED_REMOVE 7
121+
#define DMF_SUSPENDED_INTERNALLY 8
120122

121123
/*
122124
* A dummy definition to make RCU happy.
@@ -2718,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md)
27182720
}
27192721

27202722
/*
2721-
* We need to be able to change a mapping table under a mounted
2722-
* filesystem. For example we might want to move some data in
2723-
* the background. Before the table can be swapped with
2724-
* dm_bind_table, dm_suspend must be called to flush any in
2725-
* flight bios and ensure that any further io gets deferred.
2726-
*/
2727-
/*
2728-
* Suspend mechanism in request-based dm.
2729-
*
2730-
* 1. Flush all I/Os by lock_fs() if needed.
2731-
* 2. Stop dispatching any I/O by stopping the request_queue.
2732-
* 3. Wait for all in-flight I/Os to be completed or requeued.
2723+
* If __dm_suspend returns 0, the device is completely quiescent
2724+
* now. There is no request-processing activity. All new requests
2725+
* are being added to md->deferred list.
27332726
*
2734-
* To abort suspend, start the request_queue.
2727+
* Caller must hold md->suspend_lock
27352728
*/
2736-
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2729+
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
2730+
unsigned suspend_flags, int interruptible)
27372731
{
2738-
struct dm_table *map = NULL;
2739-
int r = 0;
2740-
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
2741-
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
2742-
2743-
mutex_lock(&md->suspend_lock);
2744-
2745-
if (dm_suspended_md(md)) {
2746-
r = -EINVAL;
2747-
goto out_unlock;
2748-
}
2749-
2750-
map = rcu_dereference(md->map);
2732+
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
2733+
bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
2734+
int r;
27512735

27522736
/*
27532737
* DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2772,7 +2756,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
27722756
r = lock_fs(md);
27732757
if (r) {
27742758
dm_table_presuspend_undo_targets(map);
2775-
goto out_unlock;
2759+
return r;
27762760
}
27772761
}
27782762

@@ -2806,7 +2790,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
28062790
* We call dm_wait_for_completion to wait for all existing requests
28072791
* to finish.
28082792
*/
2809-
r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
2793+
r = dm_wait_for_completion(md, interruptible);
28102794

28112795
if (noflush)
28122796
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2822,14 +2806,55 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
28222806

28232807
unlock_fs(md);
28242808
dm_table_presuspend_undo_targets(map);
2825-
goto out_unlock; /* pushback list is already flushed, so skip flush */
2809+
/* pushback list is already flushed, so skip flush */
28262810
}
28272811

2828-
/*
2829-
* If dm_wait_for_completion returned 0, the device is completely
2830-
* quiescent now. There is no request-processing activity. All new
2831-
* requests are being added to md->deferred list.
2832-
*/
2812+
return r;
2813+
}
2814+
2815+
/*
2816+
* We need to be able to change a mapping table under a mounted
2817+
* filesystem. For example we might want to move some data in
2818+
* the background. Before the table can be swapped with
2819+
* dm_bind_table, dm_suspend must be called to flush any in
2820+
* flight bios and ensure that any further io gets deferred.
2821+
*/
2822+
/*
2823+
* Suspend mechanism in request-based dm.
2824+
*
2825+
* 1. Flush all I/Os by lock_fs() if needed.
2826+
* 2. Stop dispatching any I/O by stopping the request_queue.
2827+
* 3. Wait for all in-flight I/Os to be completed or requeued.
2828+
*
2829+
* To abort suspend, start the request_queue.
2830+
*/
2831+
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2832+
{
2833+
struct dm_table *map = NULL;
2834+
int r = 0;
2835+
2836+
retry:
2837+
mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2838+
2839+
if (dm_suspended_md(md)) {
2840+
r = -EINVAL;
2841+
goto out_unlock;
2842+
}
2843+
2844+
if (dm_suspended_internally_md(md)) {
2845+
/* already internally suspended, wait for internal resume */
2846+
mutex_unlock(&md->suspend_lock);
2847+
r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2848+
if (r)
2849+
return r;
2850+
goto retry;
2851+
}
2852+
2853+
map = rcu_dereference(md->map);
2854+
2855+
r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
2856+
if (r)
2857+
goto out_unlock;
28332858

28342859
set_bit(DMF_SUSPENDED, &md->flags);
28352860

@@ -2840,35 +2865,57 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
28402865
return r;
28412866
}
28422867

2868+
static int __dm_resume(struct mapped_device *md, struct dm_table *map)
2869+
{
2870+
if (map) {
2871+
int r = dm_table_resume_targets(map);
2872+
if (r)
2873+
return r;
2874+
}
2875+
2876+
dm_queue_flush(md);
2877+
2878+
/*
2879+
* Flushing deferred I/Os must be done after targets are resumed
2880+
* so that mapping of targets can work correctly.
2881+
* Request-based dm is queueing the deferred I/Os in its request_queue.
2882+
*/
2883+
if (dm_request_based(md))
2884+
start_queue(md->queue);
2885+
2886+
unlock_fs(md);
2887+
2888+
return 0;
2889+
}
2890+
28432891
int dm_resume(struct mapped_device *md)
28442892
{
28452893
int r = -EINVAL;
28462894
struct dm_table *map = NULL;
28472895

2848-
mutex_lock(&md->suspend_lock);
2896+
retry:
2897+
mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2898+
28492899
if (!dm_suspended_md(md))
28502900
goto out;
28512901

2902+
if (dm_suspended_internally_md(md)) {
2903+
/* already internally suspended, wait for internal resume */
2904+
mutex_unlock(&md->suspend_lock);
2905+
r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2906+
if (r)
2907+
return r;
2908+
goto retry;
2909+
}
2910+
28522911
map = rcu_dereference(md->map);
28532912
if (!map || !dm_table_get_size(map))
28542913
goto out;
28552914

2856-
r = dm_table_resume_targets(map);
2915+
r = __dm_resume(md, map);
28572916
if (r)
28582917
goto out;
28592918

2860-
dm_queue_flush(md);
2861-
2862-
/*
2863-
* Flushing deferred I/Os must be done after targets are resumed
2864-
* so that mapping of targets can work correctly.
2865-
* Request-based dm is queueing the deferred I/Os in its request_queue.
2866-
*/
2867-
if (dm_request_based(md))
2868-
start_queue(md->queue);
2869-
2870-
unlock_fs(md);
2871-
28722919
clear_bit(DMF_SUSPENDED, &md->flags);
28732920

28742921
r = 0;
@@ -2882,15 +2929,80 @@ int dm_resume(struct mapped_device *md)
28822929
* Internal suspend/resume works like userspace-driven suspend. It waits
28832930
* until all bios finish and prevents issuing new bios to the target drivers.
28842931
* It may be used only from the kernel.
2885-
*
2886-
* Internal suspend holds md->suspend_lock, which prevents interaction with
2887-
* userspace-driven suspend.
28882932
*/
28892933

2890-
void dm_internal_suspend(struct mapped_device *md)
2934+
static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
28912935
{
2892-
mutex_lock(&md->suspend_lock);
2936+
struct dm_table *map = NULL;
2937+
2938+
if (dm_suspended_internally_md(md))
2939+
return; /* nested internal suspend */
2940+
2941+
if (dm_suspended_md(md)) {
2942+
set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2943+
return; /* nest suspend */
2944+
}
2945+
2946+
map = rcu_dereference(md->map);
2947+
2948+
/*
2949+
* Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
2950+
* supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend
2951+
* would require changing .presuspend to return an error -- avoid this
2952+
* until there is a need for more elaborate variants of internal suspend.
2953+
*/
2954+
(void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
2955+
2956+
set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2957+
2958+
dm_table_postsuspend_targets(map);
2959+
}
2960+
2961+
static void __dm_internal_resume(struct mapped_device *md)
2962+
{
2963+
if (!dm_suspended_internally_md(md))
2964+
return; /* resume from nested internal suspend */
2965+
28932966
if (dm_suspended_md(md))
2967+
goto done; /* resume from nested suspend */
2968+
2969+
/*
2970+
* NOTE: existing callers don't need to call dm_table_resume_targets
2971+
* (which may fail -- so best to avoid it for now by passing NULL map)
2972+
*/
2973+
(void) __dm_resume(md, NULL);
2974+
2975+
done:
2976+
clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2977+
smp_mb__after_atomic();
2978+
wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
2979+
}
2980+
2981+
void dm_internal_suspend_noflush(struct mapped_device *md)
2982+
{
2983+
mutex_lock(&md->suspend_lock);
2984+
__dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
2985+
mutex_unlock(&md->suspend_lock);
2986+
}
2987+
EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
2988+
2989+
void dm_internal_resume(struct mapped_device *md)
2990+
{
2991+
mutex_lock(&md->suspend_lock);
2992+
__dm_internal_resume(md);
2993+
mutex_unlock(&md->suspend_lock);
2994+
}
2995+
EXPORT_SYMBOL_GPL(dm_internal_resume);
2996+
2997+
/*
2998+
* Fast variants of internal suspend/resume hold md->suspend_lock,
2999+
* which prevents interaction with userspace-driven suspend.
3000+
*/
3001+
3002+
void dm_internal_suspend_fast(struct mapped_device *md)
3003+
{
3004+
mutex_lock(&md->suspend_lock);
3005+
if (dm_suspended_md(md) || dm_suspended_internally_md(md))
28943006
return;
28953007

28963008
set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
@@ -2899,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md)
28993011
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
29003012
}
29013013

2902-
void dm_internal_resume(struct mapped_device *md)
3014+
void dm_internal_resume_fast(struct mapped_device *md)
29033015
{
2904-
if (dm_suspended_md(md))
3016+
if (dm_suspended_md(md) || dm_suspended_internally_md(md))
29053017
goto done;
29063018

29073019
dm_queue_flush(md);
@@ -2987,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md)
29873099
return test_bit(DMF_SUSPENDED, &md->flags);
29883100
}
29893101

3102+
int dm_suspended_internally_md(struct mapped_device *md)
3103+
{
3104+
return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3105+
}
3106+
29903107
int dm_test_deferred_remove_flag(struct mapped_device *md)
29913108
{
29923109
return test_bit(DMF_DEFERRED_REMOVE, &md->flags);

drivers/md/dm.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,15 @@ int dm_deleting_md(struct mapped_device *md);
129129
*/
130130
int dm_suspended_md(struct mapped_device *md);
131131

132+
/*
133+
* Internal suspend and resume methods.
134+
*/
135+
int dm_suspended_internally_md(struct mapped_device *md);
136+
void dm_internal_suspend_fast(struct mapped_device *md);
137+
void dm_internal_resume_fast(struct mapped_device *md);
138+
void dm_internal_suspend_noflush(struct mapped_device *md);
139+
void dm_internal_resume(struct mapped_device *md);
140+
132141
/*
133142
* Test if the device is scheduled for deferred remove.
134143
*/

include/uapi/linux/dm-ioctl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,4 +352,9 @@ enum {
352352
*/
353353
#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
354354

355+
/*
356+
* If set, the device is suspended internally.
357+
*/
358+
#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
359+
355360
#endif /* _LINUX_DM_IOCTL_H */

0 commit comments

Comments
 (0)