From: Hugh Dickins Date: Tue, 23 Jan 2007 15:46:22 +0000 (+0100) Subject: read_zero_pagealigned() locking fix X-Git-Tag: v2.6.16.39-rc1~17 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=faa309e7b921b2104a42d4ac0e0122f3399a3789;p=karo-tx-linux.git read_zero_pagealigned() locking fix Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel bugzilla 7645. Right: read_zero_pagealigned uses down_read of mmap_sem, but another thread's racing read of /dev/zero, or a normal fault, can easily set that pte again, in between zap_page_range and zeromap_page_range getting there. It's been wrong ever since 2.4.3. The simple fix is to use down_write instead, but that would serialize reads of /dev/zero more than at present: perhaps some app would be badly affected. So instead let zeromap_page_range return the error instead of BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in that case - there's no need to optimize for it. Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of zeromap_page_range), though it really isn't interesting there. And since mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that than -ENOMEM. Signed-off-by: Hugh Dickins Signed-off-by: Adrian Bunk --- diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 29c41f4418c0..3b50f44a8d13 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -613,7 +613,8 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size) count = size; zap_page_range(vma, addr, count, NULL); - zeromap_page_range(vma, addr, count, PAGE_COPY); + if (zeromap_page_range(vma, addr, count, PAGE_COPY)) + break; size -= count; buf += count; @@ -680,11 +681,14 @@ out: static int mmap_zero(struct file * file, struct vm_area_struct * vma) { + int err; + if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); - if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - return 0; + err = zeromap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + BUG_ON(err == -EEXIST); + return err; } #else /* CONFIG_MMU */ static ssize_t read_zero(struct file * file, char * buf, diff --git a/mm/memory.c b/mm/memory.c index a4caa2f223e1..97f5ea395720 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1092,21 +1092,27 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; spinlock_t *ptl; + int err = 0; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) - return -ENOMEM; + return -EAGAIN; do { struct page *page = ZERO_PAGE(addr); pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); + + if (unlikely(!pte_none(*pte))) { + err = -EEXIST; + pte++; + break; + } page_cache_get(page); page_add_file_rmap(page); inc_mm_counter(mm, file_rss); - BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, zero_pte); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(pte - 1, ptl); - return 0; + return err; } static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, @@ -1114,16 +1120,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, { pmd_t *pmd; unsigned long next; + int err; pmd = pmd_alloc(mm, pud, addr); if (!pmd) - return -ENOMEM; + return -EAGAIN; do { next = pmd_addr_end(addr, end); - if (zeromap_pte_range(mm, pmd, addr, next, prot)) - return -ENOMEM; + err = zeromap_pte_range(mm, pmd, addr, next, prot); + if (err) + break; } while (pmd++, addr = next, addr != end); - return 0; + return err; } static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, @@ -1131,16 +1139,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, { pud_t *pud; unsigned long next; + int err; pud = pud_alloc(mm, pgd, addr); if (!pud) - return -ENOMEM; + return -EAGAIN; do { next = pud_addr_end(addr, end); - if (zeromap_pmd_range(mm, pud, addr, next, prot)) - return -ENOMEM; + err = zeromap_pmd_range(mm, pud, addr, next, prot); + if (err) + break; } while (pud++, addr = next, addr != end); - return 0; + return err; } int zeromap_page_range(struct vm_area_struct *vma,