protection(struct mmu_gather *tlb, struct vm_area_struct *vm hugetlb_vma_unlock_write(vma); mmu_notifier_invalidate_range_end(&range); + tlb_end_vma(tlb, vma); + return pages > 0 ? (pages << h->order) : pages; } @@ -7259,6 +7266,9 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, } else { i_mmap_assert_write_locked(vma->vm_file->f_mapping); } + + tlb_change_page_size(&tlb, sz); + tlb_start_vma(&tlb, vma); for (address = start; address < end; address += PUD_SIZE) { ptep = hugetlb_walk(vma, address, sz); if (!ptep) diff --git a/mm/rmap.c b/mm/rmap.c index d6799afe11147..27210bc6fb489 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2015,6 +2015,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, goto walk_abort; tlb_gather_mmu(&tlb, mm); + tlb_change_page_size(&tlb, huge_page_size(hstate_vma(vma))); + tlb_start_vma(&tlb, vma); if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) { hugetlb_vma_unlock_write(vma); huge_pmd_unshare_flush(&tlb, vma); @@ -2413,6 +2415,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, } tlb_gather_mmu(&tlb, mm); + tlb_change_page_size(&tlb, huge_page_size(hstate_vma(vma))); + tlb_start_vma(&tlb, vma); if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) { hugetlb_vma_unlock_write(vma); huge_pmd_unshare_flush(&tlb, vma); -- 2.52.0 But now I'm staring at it and wonder whether we should just defer the TLB flushing changes to a later point and only focus on the IPI flushes. Doing only that with mmu_gather looks *really* weird, and I don't want to introduce some other mechanism just for that batching purpose. Hm ... -- Cheers David[PATCH v2 4/4] mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather"David Hildenbrand (Red Hat)" undefinedHarry Yoo undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined™