3030
3131#include "internal.h"
3232
33+ struct madvise_walk_private {
34+ struct mmu_gather * tlb ;
35+ bool pageout ;
36+ };
37+
3338/*
3439 * Any behaviour which results in changes to the vma->vm_flags needs to
3540 * take mmap_sem for writing. Others, which simply traverse vmas, need
@@ -312,15 +317,22 @@ static long madvise_willneed(struct vm_area_struct *vma,
312317 return 0 ;
313318}
314319
315- static int madvise_cold_pte_range (pmd_t * pmd , unsigned long addr ,
316- unsigned long end , struct mm_walk * walk )
320+ static int madvise_cold_or_pageout_pte_range (pmd_t * pmd ,
321+ unsigned long addr , unsigned long end ,
322+ struct mm_walk * walk )
317323{
318- struct mmu_gather * tlb = walk -> private ;
324+ struct madvise_walk_private * private = walk -> private ;
325+ struct mmu_gather * tlb = private -> tlb ;
326+ bool pageout = private -> pageout ;
319327 struct mm_struct * mm = tlb -> mm ;
320328 struct vm_area_struct * vma = walk -> vma ;
321329 pte_t * orig_pte , * pte , ptent ;
322330 spinlock_t * ptl ;
323- struct page * page ;
331+ struct page * page = NULL ;
332+ LIST_HEAD (page_list );
333+
334+ if (fatal_signal_pending (current ))
335+ return - EINTR ;
324336
325337#ifdef CONFIG_TRANSPARENT_HUGEPAGE
326338 if (pmd_trans_huge (* pmd )) {
@@ -368,10 +380,17 @@ static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr,
368380 tlb_remove_pmd_tlb_entry (tlb , pmd , addr );
369381 }
370382
383+ ClearPageReferenced (page );
371384 test_and_clear_page_young (page );
372- deactivate_page (page );
385+ if (pageout ) {
386+ if (!isolate_lru_page (page ))
387+ list_add (& page -> lru , & page_list );
388+ } else
389+ deactivate_page (page );
373390huge_unlock :
374391 spin_unlock (ptl );
392+ if (pageout )
393+ reclaim_pages (& page_list );
375394 return 0 ;
376395 }
377396
@@ -439,27 +458,39 @@ static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr,
439458 * As a side effect, it makes confuse idle-page tracking
440459 * because they will miss recent referenced history.
441460 */
461+ ClearPageReferenced (page );
442462 test_and_clear_page_young (page );
443- deactivate_page (page );
463+ if (pageout ) {
464+ if (!isolate_lru_page (page ))
465+ list_add (& page -> lru , & page_list );
466+ } else
467+ deactivate_page (page );
444468 }
445469
446470 arch_leave_lazy_mmu_mode ();
447471 pte_unmap_unlock (orig_pte , ptl );
472+ if (pageout )
473+ reclaim_pages (& page_list );
448474 cond_resched ();
449475
450476 return 0 ;
451477}
452478
453479static const struct mm_walk_ops cold_walk_ops = {
454- .pmd_entry = madvise_cold_pte_range ,
480+ .pmd_entry = madvise_cold_or_pageout_pte_range ,
455481};
456482
457483static void madvise_cold_page_range (struct mmu_gather * tlb ,
458484 struct vm_area_struct * vma ,
459485 unsigned long addr , unsigned long end )
460486{
487+ struct madvise_walk_private walk_private = {
488+ .pageout = false,
489+ .tlb = tlb ,
490+ };
491+
461492 tlb_start_vma (tlb , vma );
462- walk_page_range (vma -> vm_mm , addr , end , & cold_walk_ops , NULL );
493+ walk_page_range (vma -> vm_mm , addr , end , & cold_walk_ops , & walk_private );
463494 tlb_end_vma (tlb , vma );
464495}
465496
@@ -482,151 +513,17 @@ static long madvise_cold(struct vm_area_struct *vma,
482513 return 0 ;
483514}
484515
485-
486- static int madvise_pageout_pte_range (pmd_t * pmd , unsigned long addr ,
487- unsigned long end , struct mm_walk * walk )
488- {
489- struct mmu_gather * tlb = walk -> private ;
490- struct mm_struct * mm = tlb -> mm ;
491- struct vm_area_struct * vma = walk -> vma ;
492- pte_t * orig_pte , * pte , ptent ;
493- spinlock_t * ptl ;
494- LIST_HEAD (page_list );
495- struct page * page ;
496-
497- if (fatal_signal_pending (current ))
498- return - EINTR ;
499-
500- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
501- if (pmd_trans_huge (* pmd )) {
502- pmd_t orig_pmd ;
503- unsigned long next = pmd_addr_end (addr , end );
504-
505- tlb_change_page_size (tlb , HPAGE_PMD_SIZE );
506- ptl = pmd_trans_huge_lock (pmd , vma );
507- if (!ptl )
508- return 0 ;
509-
510- orig_pmd = * pmd ;
511- if (is_huge_zero_pmd (orig_pmd ))
512- goto huge_unlock ;
513-
514- if (unlikely (!pmd_present (orig_pmd ))) {
515- VM_BUG_ON (thp_migration_supported () &&
516- !is_pmd_migration_entry (orig_pmd ));
517- goto huge_unlock ;
518- }
519-
520- page = pmd_page (orig_pmd );
521- if (next - addr != HPAGE_PMD_SIZE ) {
522- int err ;
523-
524- if (page_mapcount (page ) != 1 )
525- goto huge_unlock ;
526- get_page (page );
527- spin_unlock (ptl );
528- lock_page (page );
529- err = split_huge_page (page );
530- unlock_page (page );
531- put_page (page );
532- if (!err )
533- goto regular_page ;
534- return 0 ;
535- }
536-
537- if (pmd_young (orig_pmd )) {
538- pmdp_invalidate (vma , addr , pmd );
539- orig_pmd = pmd_mkold (orig_pmd );
540-
541- set_pmd_at (mm , addr , pmd , orig_pmd );
542- tlb_remove_tlb_entry (tlb , pmd , addr );
543- }
544-
545- ClearPageReferenced (page );
546- test_and_clear_page_young (page );
547-
548- if (!isolate_lru_page (page ))
549- list_add (& page -> lru , & page_list );
550- huge_unlock :
551- spin_unlock (ptl );
552- reclaim_pages (& page_list );
553- return 0 ;
554- }
555-
556- if (pmd_trans_unstable (pmd ))
557- return 0 ;
558- regular_page :
559- #endif
560- tlb_change_page_size (tlb , PAGE_SIZE );
561- orig_pte = pte = pte_offset_map_lock (vma -> vm_mm , pmd , addr , & ptl );
562- flush_tlb_batched_pending (mm );
563- arch_enter_lazy_mmu_mode ();
564- for (; addr < end ; pte ++ , addr += PAGE_SIZE ) {
565- ptent = * pte ;
566- if (!pte_present (ptent ))
567- continue ;
568-
569- page = vm_normal_page (vma , addr , ptent );
570- if (!page )
571- continue ;
572-
573- /*
574- * creating a THP page is expensive so split it only if we
575- * are sure it's worth. Split it if we are only owner.
576- */
577- if (PageTransCompound (page )) {
578- if (page_mapcount (page ) != 1 )
579- break ;
580- get_page (page );
581- if (!trylock_page (page )) {
582- put_page (page );
583- break ;
584- }
585- pte_unmap_unlock (orig_pte , ptl );
586- if (split_huge_page (page )) {
587- unlock_page (page );
588- put_page (page );
589- pte_offset_map_lock (mm , pmd , addr , & ptl );
590- break ;
591- }
592- unlock_page (page );
593- put_page (page );
594- pte = pte_offset_map_lock (mm , pmd , addr , & ptl );
595- pte -- ;
596- addr -= PAGE_SIZE ;
597- continue ;
598- }
599-
600- VM_BUG_ON_PAGE (PageTransCompound (page ), page );
601-
602- if (pte_young (ptent )) {
603- ptent = ptep_get_and_clear_full (mm , addr , pte ,
604- tlb -> fullmm );
605- ptent = pte_mkold (ptent );
606- set_pte_at (mm , addr , pte , ptent );
607- tlb_remove_tlb_entry (tlb , pte , addr );
608- }
609- ClearPageReferenced (page );
610- test_and_clear_page_young (page );
611-
612- if (!isolate_lru_page (page ))
613- list_add (& page -> lru , & page_list );
614- }
615-
616- arch_leave_lazy_mmu_mode ();
617- pte_unmap_unlock (orig_pte , ptl );
618- reclaim_pages (& page_list );
619- cond_resched ();
620-
621- return 0 ;
622- }
623-
624516static void madvise_pageout_page_range (struct mmu_gather * tlb ,
625517 struct vm_area_struct * vma ,
626518 unsigned long addr , unsigned long end )
627519{
520+ struct madvise_walk_private walk_private = {
521+ .pageout = true,
522+ .tlb = tlb ,
523+ };
524+
628525 tlb_start_vma (tlb , vma );
629- walk_page_range (vma -> vm_mm , addr , end , & cold_walk_ops , NULL );
526+ walk_page_range (vma -> vm_mm , addr , end , & cold_walk_ops , & walk_private );
630527 tlb_end_vma (tlb , vma );
631528}
632529
0 commit comments