Skip to content

Commit 897ab3e

Browse files
rppttorvalds
authored andcommitted
userfaultfd: non-cooperative: add event for memory unmaps
When a non-cooperative userfaultfd monitor copies pages in the background, it may encounter regions that were already unmapped. Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely changes in the virtual memory layout. Since there might be different uffd contexts for the affected VMAs, we first should create a temporary representation for the unmap event for each uffd context and then notify them one by one to the appropriate userfault file descriptors. The event notification occurs after the mmap_sem has been released. [[email protected]: fix nommu build] Link: http://lkml.kernel.org/r/[email protected] [[email protected]: fix nommu build] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Mike Rapoport <[email protected]> Signed-off-by: Michal Hocko <[email protected]> Signed-off-by: Arnd Bergmann <[email protected]> Acked-by: Hillf Danton <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: "Dr. David Alan Gilbert" <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Pavel Emelyanov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 846b1a0 commit 897ab3e

File tree

15 files changed

+160
-45
lines changed

15 files changed

+160
-45
lines changed

arch/mips/kernel/vdso.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
111111
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
112112
VM_READ|VM_WRITE|VM_EXEC|
113113
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
114-
0);
114+
0, NULL);
115115
if (IS_ERR_VALUE(base)) {
116116
ret = base;
117117
goto out;

arch/tile/mm/elf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
143143
unsigned long addr = MEM_USER_INTRPT;
144144
addr = mmap_region(NULL, addr, INTRPT_SIZE,
145145
VM_READ|VM_EXEC|
146-
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
146+
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL);
147147
if (addr > (unsigned long) -PAGE_SIZE)
148148
retval = (int) addr;
149149
}

arch/x86/entry/vdso/vma.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
186186

187187
if (IS_ERR(vma)) {
188188
ret = PTR_ERR(vma);
189-
do_munmap(mm, text_start, image->size);
189+
do_munmap(mm, text_start, image->size, NULL);
190190
} else {
191191
current->mm->context.vdso = (void __user *)text_start;
192192
current->mm->context.vdso_image = image;

arch/x86/mm/mpx.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static unsigned long mpx_mmap(unsigned long len)
5151

5252
down_write(&mm->mmap_sem);
5353
addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
54-
MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
54+
MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
5555
up_write(&mm->mmap_sem);
5656
if (populate)
5757
mm_populate(addr, populate);
@@ -893,7 +893,7 @@ static int unmap_entire_bt(struct mm_struct *mm,
893893
* avoid recursion, do_munmap() will check whether it comes
894894
* from one bounds table through VM_MPX flag.
895895
*/
896-
return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
896+
return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
897897
}
898898

899899
static int try_unmap_single_bt(struct mm_struct *mm,

fs/aio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx)
512512

513513
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
514514
PROT_READ | PROT_WRITE,
515-
MAP_SHARED, 0, &unused);
515+
MAP_SHARED, 0, &unused, NULL);
516516
up_write(&mm->mmap_sem);
517517
if (IS_ERR((void *)ctx->mmap_base)) {
518518
ctx->mmap_size = 0;

fs/proc/vmcore.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
388388
}
389389
return 0;
390390
fail:
391-
do_munmap(vma->vm_mm, from, len);
391+
do_munmap(vma->vm_mm, from, len, NULL);
392392
return -EAGAIN;
393393
}
394394

@@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
481481

482482
return 0;
483483
fail:
484-
do_munmap(vma->vm_mm, vma->vm_start, len);
484+
do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
485485
return -EAGAIN;
486486
}
487487
#else

fs/userfaultfd.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@ struct userfaultfd_fork_ctx {
7171
struct list_head list;
7272
};
7373

74+
struct userfaultfd_unmap_ctx {
75+
struct userfaultfd_ctx *ctx;
76+
unsigned long start;
77+
unsigned long end;
78+
struct list_head list;
79+
};
80+
7481
struct userfaultfd_wait_queue {
7582
struct uffd_msg msg;
7683
wait_queue_t wq;
@@ -709,6 +716,64 @@ void userfaultfd_remove(struct vm_area_struct *vma,
709716
down_read(&mm->mmap_sem);
710717
}
711718

719+
static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
720+
unsigned long start, unsigned long end)
721+
{
722+
struct userfaultfd_unmap_ctx *unmap_ctx;
723+
724+
list_for_each_entry(unmap_ctx, unmaps, list)
725+
if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
726+
unmap_ctx->end == end)
727+
return true;
728+
729+
return false;
730+
}
731+
732+
int userfaultfd_unmap_prep(struct vm_area_struct *vma,
733+
unsigned long start, unsigned long end,
734+
struct list_head *unmaps)
735+
{
736+
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
737+
struct userfaultfd_unmap_ctx *unmap_ctx;
738+
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
739+
740+
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
741+
has_unmap_ctx(ctx, unmaps, start, end))
742+
continue;
743+
744+
unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
745+
if (!unmap_ctx)
746+
return -ENOMEM;
747+
748+
userfaultfd_ctx_get(ctx);
749+
unmap_ctx->ctx = ctx;
750+
unmap_ctx->start = start;
751+
unmap_ctx->end = end;
752+
list_add_tail(&unmap_ctx->list, unmaps);
753+
}
754+
755+
return 0;
756+
}
757+
758+
void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
759+
{
760+
struct userfaultfd_unmap_ctx *ctx, *n;
761+
struct userfaultfd_wait_queue ewq;
762+
763+
list_for_each_entry_safe(ctx, n, uf, list) {
764+
msg_init(&ewq.msg);
765+
766+
ewq.msg.event = UFFD_EVENT_UNMAP;
767+
ewq.msg.arg.remove.start = ctx->start;
768+
ewq.msg.arg.remove.end = ctx->end;
769+
770+
userfaultfd_event_wait_completion(ctx->ctx, &ewq);
771+
772+
list_del(&ctx->list);
773+
kfree(ctx);
774+
}
775+
}
776+
712777
static int userfaultfd_release(struct inode *inode, struct file *file)
713778
{
714779
struct userfaultfd_ctx *ctx = file->private_data;

include/linux/mm.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2090,18 +2090,22 @@ extern int install_special_mapping(struct mm_struct *mm,
20902090
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
20912091

20922092
extern unsigned long mmap_region(struct file *file, unsigned long addr,
2093-
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
2093+
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
2094+
struct list_head *uf);
20942095
extern unsigned long do_mmap(struct file *file, unsigned long addr,
20952096
unsigned long len, unsigned long prot, unsigned long flags,
2096-
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
2097-
extern int do_munmap(struct mm_struct *, unsigned long, size_t);
2097+
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
2098+
struct list_head *uf);
2099+
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
2100+
struct list_head *uf);
20982101

20992102
static inline unsigned long
21002103
do_mmap_pgoff(struct file *file, unsigned long addr,
21012104
unsigned long len, unsigned long prot, unsigned long flags,
2102-
unsigned long pgoff, unsigned long *populate)
2105+
unsigned long pgoff, unsigned long *populate,
2106+
struct list_head *uf)
21032107
{
2104-
return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
2108+
return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
21052109
}
21062110

21072111
#ifdef CONFIG_MMU

include/linux/userfaultfd_k.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ extern void userfaultfd_remove(struct vm_area_struct *vma,
6666
unsigned long start,
6767
unsigned long end);
6868

69+
extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
70+
unsigned long start, unsigned long end,
71+
struct list_head *uf);
72+
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
73+
struct list_head *uf);
74+
6975
#else /* CONFIG_USERFAULTFD */
7076

7177
/* mm helpers */
@@ -118,6 +124,18 @@ static inline void userfaultfd_remove(struct vm_area_struct *vma,
118124
unsigned long end)
119125
{
120126
}
127+
128+
static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
129+
unsigned long start, unsigned long end,
130+
struct list_head *uf)
131+
{
132+
return 0;
133+
}
134+
135+
static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
136+
struct list_head *uf)
137+
{
138+
}
121139
#endif /* CONFIG_USERFAULTFD */
122140

123141
#endif /* _LINUX_USERFAULTFD_K_H */

include/uapi/linux/userfaultfd.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \
2222
UFFD_FEATURE_EVENT_REMAP | \
2323
UFFD_FEATURE_EVENT_REMOVE | \
24+
UFFD_FEATURE_EVENT_UNMAP | \
2425
UFFD_FEATURE_MISSING_HUGETLBFS | \
2526
UFFD_FEATURE_MISSING_SHMEM)
2627
#define UFFD_API_IOCTLS \
@@ -110,6 +111,7 @@ struct uffd_msg {
110111
#define UFFD_EVENT_FORK 0x13
111112
#define UFFD_EVENT_REMAP 0x14
112113
#define UFFD_EVENT_REMOVE 0x15
114+
#define UFFD_EVENT_UNMAP 0x16
113115

114116
/* flags for UFFD_EVENT_PAGEFAULT */
115117
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
@@ -158,6 +160,7 @@ struct uffdio_api {
158160
#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
159161
#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
160162
#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
163+
#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
161164
__u64 features;
162165

163166
__u64 ioctls;

ipc/shm.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
12221222
goto invalid;
12231223
}
12241224

1225-
addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1225+
addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
12261226
*raddr = addr;
12271227
err = 0;
12281228
if (IS_ERR_VALUE(addr))
@@ -1329,7 +1329,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
13291329
*/
13301330
file = vma->vm_file;
13311331
size = i_size_read(file_inode(vma->vm_file));
1332-
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1332+
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
13331333
/*
13341334
* We discovered the size of the shm segment, so
13351335
* break out of here and fall through to the next
@@ -1356,7 +1356,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
13561356
if ((vma->vm_ops == &shm_vm_ops) &&
13571357
((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
13581358
(vma->vm_file == file))
1359-
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1359+
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
13601360
vma = next;
13611361
}
13621362

@@ -1365,7 +1365,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
13651365
* given
13661366
*/
13671367
if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1368-
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1368+
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
13691369
retval = 0;
13701370
}
13711371

0 commit comments

Comments
 (0)