Skip to content

Commit 9cb28da

Browse files
xzpeterakpm00
authored andcommitted
mm/gup: handle hugetlb in the generic follow_page_mask code
Now follow_page() is ready to handle hugetlb pages in whatever form, and over all architectures. Switch to the generic code path. Time to retire hugetlb_follow_page_mask(), following the previous retirement of follow_hugetlb_page() in 4849807. There may be a slight difference of how the loops run when processing slow GUP over a large hugetlb range on cont_pte/cont_pmd supported archs: each loop of __get_user_pages() will resolve one pgtable entry with the patch applied, rather than relying on the size of hugetlb hstate, the latter may cover multiple entries in one loop. A quick performance test on an aarch64 VM on M1 chip shows 15% degrade over a tight loop of slow gup after the path switched. That shouldn't be a problem because slow-gup should not be a hot path for GUP in general: when page is commonly present, fast-gup will already succeed, while when the page is indeed missing and require a follow up page fault, the slow gup degrade will probably buried in the fault paths anyway. It also explains why slow gup for THP used to be very slow before 57edfcf ("mm/gup: accelerate thp gup even for "pages != NULL"") lands, the latter not part of a performance analysis but a side benefit. If the performance will be a concern, we can consider handle CONT_PTE in follow_page(). Before that is justified to be necessary, keep everything clean and simple. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Peter Xu <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Tested-by: Ryan Roberts <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Andrew Jones <[email protected]> Cc: Aneesh Kumar K.V (IBM) <[email protected]> Cc: Axel Rasmussen <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: James Houghton <[email protected]> Cc: John Hubbard <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: "Mike Rapoport (IBM)" <[email protected]> Cc: Muchun Song <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Yang Shi <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent a12083d commit 9cb28da

File tree

3 files changed

+5
-88
lines changed

3 files changed

+5
-88
lines changed

include/linux/hugetlb.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,6 @@ static inline void hugetlb_zap_end(
328328
{
329329
}
330330

331-
static inline struct page *hugetlb_follow_page_mask(
332-
struct vm_area_struct *vma, unsigned long address, unsigned int flags,
333-
unsigned int *page_mask)
334-
{
335-
BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
336-
}
337-
338331
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
339332
struct mm_struct *src,
340333
struct vm_area_struct *dst_vma,

mm/gup.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,18 +1132,11 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
11321132
{
11331133
pgd_t *pgd;
11341134
struct mm_struct *mm = vma->vm_mm;
1135+
struct page *page;
11351136

1136-
ctx->page_mask = 0;
1137-
1138-
/*
1139-
* Call hugetlb_follow_page_mask for hugetlb vmas as it will use
1140-
* special hugetlb page table walking code. This eliminates the
1141-
* need to check for hugetlb entries in the general walking code.
1142-
*/
1143-
if (is_vm_hugetlb_page(vma))
1144-
return hugetlb_follow_page_mask(vma, address, flags,
1145-
&ctx->page_mask);
1137+
vma_pgtable_walk_begin(vma);
11461138

1139+
ctx->page_mask = 0;
11471140
pgd = pgd_offset(mm, address);
11481141

11491142
if (unlikely(is_hugepd(__hugepd(pgd_val(*pgd)))))
@@ -1154,6 +1147,8 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
11541147
else
11551148
page = follow_p4d_mask(vma, address, pgd, flags, ctx);
11561149

1150+
vma_pgtable_walk_end(vma);
1151+
11571152
return page;
11581153
}
11591154

mm/hugetlb.c

Lines changed: 0 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -6876,77 +6876,6 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
68766876
}
68776877
#endif /* CONFIG_USERFAULTFD */
68786878

6879-
struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
6880-
unsigned long address, unsigned int flags,
6881-
unsigned int *page_mask)
6882-
{
6883-
struct hstate *h = hstate_vma(vma);
6884-
struct mm_struct *mm = vma->vm_mm;
6885-
unsigned long haddr = address & huge_page_mask(h);
6886-
struct page *page = NULL;
6887-
spinlock_t *ptl;
6888-
pte_t *pte, entry;
6889-
int ret;
6890-
6891-
hugetlb_vma_lock_read(vma);
6892-
pte = hugetlb_walk(vma, haddr, huge_page_size(h));
6893-
if (!pte)
6894-
goto out_unlock;
6895-
6896-
ptl = huge_pte_lock(h, mm, pte);
6897-
entry = huge_ptep_get(pte);
6898-
if (pte_present(entry)) {
6899-
page = pte_page(entry);
6900-
6901-
if (!huge_pte_write(entry)) {
6902-
if (flags & FOLL_WRITE) {
6903-
page = NULL;
6904-
goto out;
6905-
}
6906-
6907-
if (gup_must_unshare(vma, flags, page)) {
6908-
/* Tell the caller to do unsharing */
6909-
page = ERR_PTR(-EMLINK);
6910-
goto out;
6911-
}
6912-
}
6913-
6914-
page = nth_page(page, ((address & ~huge_page_mask(h)) >> PAGE_SHIFT));
6915-
6916-
/*
6917-
* Note that page may be a sub-page, and with vmemmap
6918-
* optimizations the page struct may be read only.
6919-
* try_grab_page() will increase the ref count on the
6920-
* head page, so this will be OK.
6921-
*
6922-
* try_grab_page() should always be able to get the page here,
6923-
* because we hold the ptl lock and have verified pte_present().
6924-
*/
6925-
ret = try_grab_page(page, flags);
6926-
6927-
if (WARN_ON_ONCE(ret)) {
6928-
page = ERR_PTR(ret);
6929-
goto out;
6930-
}
6931-
6932-
*page_mask = (1U << huge_page_order(h)) - 1;
6933-
}
6934-
out:
6935-
spin_unlock(ptl);
6936-
out_unlock:
6937-
hugetlb_vma_unlock_read(vma);
6938-
6939-
/*
6940-
* Fixup retval for dump requests: if pagecache doesn't exist,
6941-
* don't try to allocate a new page but just skip it.
6942-
*/
6943-
if (!page && (flags & FOLL_DUMP) &&
6944-
!hugetlbfs_pagecache_present(h, vma, address))
6945-
page = ERR_PTR(-EFAULT);
6946-
6947-
return page;
6948-
}
6949-
69506879
long hugetlb_change_protection(struct vm_area_struct *vma,
69516880
unsigned long address, unsigned long end,
69526881
pgprot_t newprot, unsigned long cp_flags)

0 commit comments

Comments
 (0)