X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=mm%2Frmap.c;h=41ac39749ef42af4b7b6bb173372fb081f39735a;hb=a09c83847b664dcd67a72613374061c900afb799;hp=a9136d8b7577fc47cc5aeb82e2997cd8ded06885;hpb=43f82216f0bd114599f4a221ae6924f3658a0c9a;p=mv-sheeva.git diff --git a/mm/rmap.c b/mm/rmap.c index a9136d8b757..41ac39749ef 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -21,27 +21,21 @@ * Lock ordering in mm: * * inode->i_mutex (while writing or truncating, not reading or faulting) - * inode->i_alloc_sem - * - * When a page fault occurs in writing from user to file, down_read - * of mmap_sem nests within i_mutex; in sys_msync, i_mutex nests within - * down_read of mmap_sem; i_mutex and down_write of mmap_sem are never - * taken together; in truncation, i_mutex is taken outermost. - * - * mm->mmap_sem - * page->flags PG_locked (lock_page) - * mapping->i_mmap_lock - * anon_vma->lock - * mm->page_table_lock or pte_lock - * zone->lru_lock (in mark_page_accessed, isolate_lru_page) - * swap_lock (in swap_duplicate, swap_info_get) - * mmlist_lock (in mmput, drain_mmlist and others) - * mapping->private_lock (in __set_page_dirty_buffers) - * inode_lock (in set_page_dirty's __mark_inode_dirty) - * sb_lock (within inode_lock in fs/fs-writeback.c) - * mapping->tree_lock (widely used, in set_page_dirty, - * in arch-dependent flush_dcache_mmap_lock, - * within inode_lock in __sync_single_inode) + * inode->i_alloc_sem (vmtruncate_range) + * mm->mmap_sem + * page->flags PG_locked (lock_page) + * mapping->i_mmap_lock + * anon_vma->lock + * mm->page_table_lock or pte_lock + * zone->lru_lock (in mark_page_accessed, isolate_lru_page) + * swap_lock (in swap_duplicate, swap_info_get) + * mmlist_lock (in mmput, drain_mmlist and others) + * mapping->private_lock (in __set_page_dirty_buffers) + * inode_lock (in set_page_dirty's __mark_inode_dirty) + * sb_lock (within inode_lock in fs/fs-writeback.c) + * mapping->tree_lock (widely used, in set_page_dirty, + * in arch-dependent flush_dcache_mmap_lock, + * within inode_lock in __sync_single_inode) */ #include @@ -53,29 +47,12 @@ #include #include #include +#include #include struct kmem_cache *anon_vma_cachep; -static inline void validate_anon_vma(struct vm_area_struct *find_vma) -{ -#ifdef CONFIG_DEBUG_VM - struct anon_vma *anon_vma = find_vma->anon_vma; - struct vm_area_struct *vma; - unsigned int mapcount = 0; - int found = 0; - - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { - mapcount++; - BUG_ON(mapcount > 100000); - if (vma == find_vma) - found = 1; - } - BUG_ON(!found); -#endif -} - /* This must be called under the mmap_sem. */ int anon_vma_prepare(struct vm_area_struct *vma) { @@ -126,10 +103,8 @@ void __anon_vma_link(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) { + if (anon_vma) list_add_tail(&vma->anon_vma_node, &anon_vma->head); - validate_anon_vma(vma); - } } void anon_vma_link(struct vm_area_struct *vma) @@ -139,7 +114,6 @@ void anon_vma_link(struct vm_area_struct *vma) if (anon_vma) { spin_lock(&anon_vma->lock); list_add_tail(&vma->anon_vma_node, &anon_vma->head); - validate_anon_vma(vma); spin_unlock(&anon_vma->lock); } } @@ -153,7 +127,6 @@ void anon_vma_unlink(struct vm_area_struct *vma) return; spin_lock(&anon_vma->lock); - validate_anon_vma(vma); list_del(&vma->anon_vma_node); /* We must garbage collect the anon_vma if it's empty */ @@ -167,19 +140,16 @@ void anon_vma_unlink(struct vm_area_struct *vma) static void anon_vma_ctor(void *data, struct kmem_cache *cachep, unsigned long flags) { - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - struct anon_vma *anon_vma = data; + struct anon_vma *anon_vma = data; - spin_lock_init(&anon_vma->lock); - INIT_LIST_HEAD(&anon_vma->head); - } + spin_lock_init(&anon_vma->lock); + INIT_LIST_HEAD(&anon_vma->head); } void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), - 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL); + 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); } /* @@ -188,7 +158,7 @@ void __init anon_vma_init(void) */ static struct anon_vma *page_lock_anon_vma(struct page *page) { - struct anon_vma *anon_vma = NULL; + struct anon_vma *anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -200,9 +170,16 @@ static struct anon_vma *page_lock_anon_vma(struct page *page) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); spin_lock(&anon_vma->lock); + return anon_vma; out: rcu_read_unlock(); - return anon_vma; + return NULL; +} + +static void page_unlock_anon_vma(struct anon_vma *anon_vma) +{ + spin_unlock(&anon_vma->lock); + rcu_read_unlock(); } /* @@ -338,7 +315,8 @@ static int page_referenced_anon(struct page *page) if (!mapcount) break; } - spin_unlock(&anon_vma->lock); + + page_unlock_anon_vma(anon_vma); return referenced; } @@ -438,7 +416,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; unsigned long address; - pte_t *pte, entry; + pte_t *pte; spinlock_t *ptl; int ret = 0; @@ -450,17 +428,18 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) if (!pte) goto out; - if (!pte_dirty(*pte) && !pte_write(*pte)) - goto unlock; + if (pte_dirty(*pte) || pte_write(*pte)) { + pte_t entry; - entry = ptep_get_and_clear(mm, address, pte); - entry = pte_mkclean(entry); - entry = pte_wrprotect(entry); - ptep_establish(vma, address, pte, entry); - lazy_mmu_prot_update(entry); - ret = 1; + flush_cache_page(vma, address, pte_pfn(*pte)); + entry = ptep_clear_flush(vma, address, pte); + entry = pte_wrprotect(entry); + entry = pte_mkclean(entry); + set_pte_at(mm, address, pte, entry); + lazy_mmu_prot_update(entry); + ret = 1; + } -unlock: pte_unmap_unlock(pte, ptl); out: return ret; @@ -494,10 +473,15 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) ret = page_mkclean_file(mapping, page); + if (page_test_dirty(page)) { + page_clear_dirty(page); + ret = 1; + } } return ret; } +EXPORT_SYMBOL_GPL(page_mkclean); /** * page_set_anon_rmap - setup new anonymous rmap @@ -523,20 +507,52 @@ static void __page_set_anon_rmap(struct page *page, __inc_zone_page_state(page, NR_ANON_PAGES); } +/** + * page_set_anon_rmap - sanity check anonymous rmap addition + * @page: the page to add the mapping to + * @vma: the vm area in which the mapping is added + * @address: the user virtual address mapped + */ +static void __page_check_anon_rmap(struct page *page, + struct vm_area_struct *vma, unsigned long address) +{ +#ifdef CONFIG_DEBUG_VM + /* + * The page's anon-rmap details (mapping and index) are guaranteed to + * be set up correctly at this point. + * + * We have exclusion against page_add_anon_rmap because the caller + * always holds the page locked, except if called from page_dup_rmap, + * in which case the page is already known to be setup. + * + * We have exclusion against page_add_new_anon_rmap because those pages + * are initially only visible via the pagetables, and the pte is locked + * over the call to page_add_new_anon_rmap. + */ + struct anon_vma *anon_vma = vma->anon_vma; + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; + BUG_ON(page->mapping != (struct address_space *)anon_vma); + BUG_ON(page->index != linear_page_index(vma, address)); +#endif +} + /** * page_add_anon_rmap - add pte mapping to an anonymous page * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * - * The caller needs to hold the pte lock. + * The caller needs to hold the pte lock and the page must be locked. */ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (atomic_inc_and_test(&page->_mapcount)) __page_set_anon_rmap(page, vma, address); - /* else checking page index and mapping is racy */ + else + __page_check_anon_rmap(page, vma, address); } /* @@ -547,10 +563,12 @@ void page_add_anon_rmap(struct page *page, * * Same as page_add_anon_rmap but must only be called on *new* pages. * This means the inc-and-test can be bypassed. + * Page does not have to be locked. */ void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { + BUG_ON(address < vma->vm_start || address >= vma->vm_end); atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */ __page_set_anon_rmap(page, vma, address); } @@ -567,20 +585,48 @@ void page_add_file_rmap(struct page *page) __inc_zone_page_state(page, NR_FILE_MAPPED); } +#ifdef CONFIG_DEBUG_VM +/** + * page_dup_rmap - duplicate pte mapping to a page + * @page: the page to add the mapping to + * + * For copy_page_range only: minimal extract from page_add_file_rmap / + * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's + * quicker. + * + * The caller needs to hold the pte lock. + */ +void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) +{ + BUG_ON(page_mapcount(page) == 0); + if (PageAnon(page)) + __page_check_anon_rmap(page, vma, address); + atomic_inc(&page->_mapcount); +} +#endif + /** * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from * * The caller needs to hold the pte lock. */ -void page_remove_rmap(struct page *page) +void page_remove_rmap(struct page *page, struct vm_area_struct *vma) { if (atomic_add_negative(-1, &page->_mapcount)) { if (unlikely(page_mapcount(page) < 0)) { printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); + printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page)); printk (KERN_EMERG " page->flags = %lx\n", page->flags); printk (KERN_EMERG " page->count = %x\n", page_count(page)); printk (KERN_EMERG " page->mapping = %p\n", page->mapping); + print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); + if (vma->vm_ops) { + print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); + print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault); + } + if (vma->vm_file && vma->vm_file->f_op) + print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); BUG(); } @@ -593,8 +639,10 @@ void page_remove_rmap(struct page *page) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - if (page_test_and_clear_dirty(page)) + if (page_test_dirty(page)) { + page_clear_dirty(page); set_page_dirty(page); + } __dec_zone_page_state(page, PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); } @@ -685,7 +733,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, dec_mm_counter(mm, file_rss); - page_remove_rmap(page); + page_remove_rmap(page, vma); page_cache_release(page); out_unmap: @@ -775,7 +823,7 @@ static void try_to_unmap_cluster(unsigned long cursor, if (pte_dirty(pteval)) set_page_dirty(page); - page_remove_rmap(page); + page_remove_rmap(page, vma); page_cache_release(page); dec_mm_counter(mm, file_rss); (*mapcount)--; @@ -798,7 +846,8 @@ static int try_to_unmap_anon(struct page *page, int migration) if (ret == SWAP_FAIL || !page_mapped(page)) break; } - spin_unlock(&anon_vma->lock); + + page_unlock_anon_vma(anon_vma); return ret; }