X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=mm%2Fuserfaultfd.c;h=a0817cc470b0067001b5bb08016faa3248251693;hb=26071cedc519b822f69cc42dba9be969d2cdeb19;hp=af817e5060fbfbda2be8ba35024c1ad460055b20;hpb=e61249bb8a67a9f31a52f91259569aa44e9f6ca2;p=karo-tx-linux.git diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af817e5060fb..a0817cc470b0 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include "internal.h" @@ -139,6 +142,198 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) return pmd; } +#ifdef CONFIG_HUGETLB_PAGE +/* + * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is + * called with mmap_sem held, it will release mmap_sem before returning. + */ +static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, + struct vm_area_struct *dst_vma, + unsigned long dst_start, + unsigned long src_start, + unsigned long len, + bool zeropage) +{ + ssize_t err; + pte_t *dst_pte; + unsigned long src_addr, dst_addr; + long copied; + struct page *page; + struct hstate *h; + unsigned long vma_hpagesize; + pgoff_t idx; + u32 hash; + struct address_space *mapping; + + /* + * There is no default zero huge page for all huge page sizes as + * supported by hugetlb. A PMD_SIZE huge pages may exist as used + * by THP. Since we can not reliably insert a zero page, this + * feature is not supported. + */ + if (zeropage) { + up_read(&dst_mm->mmap_sem); + return -EINVAL; + } + + src_addr = src_start; + dst_addr = dst_start; + copied = 0; + page = NULL; + vma_hpagesize = vma_kernel_pagesize(dst_vma); + + /* + * Validate alignment based on huge page size + */ + err = -EINVAL; + if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) + goto out_unlock; + +retry: + /* + * On routine entry dst_vma is set. If we had to drop mmap_sem and + * retry, dst_vma will be set to NULL and we must lookup again. + */ + if (!dst_vma) { + err = -EINVAL; + dst_vma = find_vma(dst_mm, dst_start); + if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) + goto out_unlock; + + if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) + goto out_unlock; + + /* + * Make sure the vma is not shared, that the remaining dst + * range is both valid and fully within a single existing vma. + */ + if (dst_vma->vm_flags & VM_SHARED) + goto out_unlock; + if (dst_start < dst_vma->vm_start || + dst_start + len > dst_vma->vm_end) + goto out_unlock; + } + + if (WARN_ON(dst_addr & (vma_hpagesize - 1) || + (len - copied) & (vma_hpagesize - 1))) + goto out_unlock; + + /* + * Only allow __mcopy_atomic_hugetlb on userfaultfd registered ranges. + */ + if (!dst_vma->vm_userfaultfd_ctx.ctx) + goto out_unlock; + + /* + * Ensure the dst_vma has a anon_vma. + */ + err = -ENOMEM; + if (unlikely(anon_vma_prepare(dst_vma))) + goto out_unlock; + + h = hstate_vma(dst_vma); + + while (src_addr < src_start + len) { + pte_t dst_pteval; + + BUG_ON(dst_addr >= dst_start + len); + VM_BUG_ON(dst_addr & ~huge_page_mask(h)); + + /* + * Serialize via hugetlb_fault_mutex + */ + idx = linear_page_index(dst_vma, dst_addr); + mapping = dst_vma->vm_file->f_mapping; + hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, + idx, dst_addr); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + + err = -ENOMEM; + dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h)); + if (!dst_pte) { + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + goto out_unlock; + } + + err = -EEXIST; + dst_pteval = huge_ptep_get(dst_pte); + if (!huge_pte_none(dst_pteval)) { + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + goto out_unlock; + } + + err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, + dst_addr, src_addr, &page); + + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + + cond_resched(); + + if (unlikely(err == -EFAULT)) { + up_read(&dst_mm->mmap_sem); + BUG_ON(!page); + + err = copy_huge_page_from_user(page, + (const void __user *)src_addr, + pages_per_huge_page(h), true); + if (unlikely(err)) { + err = -EFAULT; + goto out; + } + down_read(&dst_mm->mmap_sem); + + dst_vma = NULL; + goto retry; + } else + BUG_ON(page); + + if (!err) { + dst_addr += vma_hpagesize; + src_addr += vma_hpagesize; + copied += vma_hpagesize; + + if (fatal_signal_pending(current)) + err = -EINTR; + } + if (err) + break; + } + +out_unlock: + up_read(&dst_mm->mmap_sem); +out: + if (page) { + /* + * We encountered an error and are about to free a newly + * allocated huge page. It is possible that there was a + * reservation associated with the page that has been + * consumed. See the routine restore_reserve_on_error + * for details. Unfortunately, we can not call + * restore_reserve_on_error now as it would require holding + * mmap_sem. Clear the PagePrivate flag so that the global + * reserve count will not be incremented in free_huge_page. + * The reservation map will still indicate the reservation + * was consumed and possibly prevent later page allocation. + * This is better than leaking a global reservation. + */ + ClearPagePrivate(page); + put_page(page); + } + BUG_ON(copied < 0); + BUG_ON(err > 0); + BUG_ON(!copied && !err); + return copied ? copied : err; +} +#else /* !CONFIG_HUGETLB_PAGE */ +/* fail at build time if gcc attempts to use this */ +extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, + struct vm_area_struct *dst_vma, + unsigned long dst_start, + unsigned long src_start, + unsigned long len, + bool zeropage); +#endif /* CONFIG_HUGETLB_PAGE */ + static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, @@ -175,12 +370,21 @@ retry: */ err = -EINVAL; dst_vma = find_vma(dst_mm, dst_start); - if (!dst_vma || (dst_vma->vm_flags & VM_SHARED)) + if (!dst_vma) + goto out_unlock; + if (!vma_is_shmem(dst_vma) && dst_vma->vm_flags & VM_SHARED) goto out_unlock; if (dst_start < dst_vma->vm_start || dst_start + len > dst_vma->vm_end) goto out_unlock; + /* + * If this is a HUGETLB vma, pass off to appropriate routine + */ + if (is_vm_hugetlb_page(dst_vma)) + return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, + src_start, len, zeropage); + /* * Be strict and only allow __mcopy_atomic on userfaultfd * registered ranges to prevent userland errors going @@ -193,11 +397,7 @@ retry: if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; - /* - * FIXME: only allow copying on anonymous vmas, tmpfs should - * be added. - */ - if (dst_vma->vm_ops) + if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; /* @@ -206,7 +406,7 @@ retry: * dst_vma. */ err = -ENOMEM; - if (unlikely(anon_vma_prepare(dst_vma))) + if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma))) goto out_unlock; while (src_addr < src_start + len) { @@ -243,12 +443,21 @@ retry: BUG_ON(pmd_none(*dst_pmd)); BUG_ON(pmd_trans_huge(*dst_pmd)); - if (!zeropage) - err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, - dst_addr, src_addr, &page); - else - err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, - dst_addr); + if (vma_is_anonymous(dst_vma)) { + if (!zeropage) + err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, src_addr, + &page); + else + err = mfill_zeropage_pte(dst_mm, dst_pmd, + dst_vma, dst_addr); + } else { + err = -EINVAL; /* if zeropage is true return -EINVAL */ + if (likely(!zeropage)) + err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd, + dst_vma, dst_addr, + src_addr, &page); + } cond_resched();