struct mempolicy;
struct anon_vma;
+struct anon_vma_chain;
struct file_ra_state;
struct user_struct;
struct writeback_control;
list_add_tail(&vma->shared.nonlinear, list);
}
+void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
+ struct rb_root *root);
+void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
+ struct rb_root *root);
+struct anon_vma_chain *anon_vma_interval_tree_iter_first(
+ struct rb_root *root, unsigned long start, unsigned long last);
+struct anon_vma_chain *anon_vma_interval_tree_iter_next(
+ struct anon_vma_chain *node, unsigned long start, unsigned long last);
+
+#define anon_vma_interval_tree_foreach(avc, root, start, last) \
+ for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
+ avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
+
/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
atomic_t refcount;
/*
- * NOTE: the LSB of the head.next is set by
+ * NOTE: the LSB of the rb_root.rb_node is set by
* mm_take_all_locks() _after_ taking the above lock. So the
- * head must only be read/written after taking the above lock
+ * rb_root must only be read/written after taking the above lock
* to be sure to see a valid next pointer. The LSB bit itself
* is serialized by a system wide lock only visible to
* mm_take_all_locks() (mm_all_locks_mutex).
*/
- struct list_head head; /* Chain of private "related" vmas */
+ struct rb_root rb_root; /* Interval tree of private "related" vmas */
};
/*
* with a VMA, or the VMAs associated with an anon_vma.
* The "same_vma" list contains the anon_vma_chains linking
* all the anon_vmas associated with this VMA.
- * The "same_anon_vma" list contains the anon_vma_chains
+ * The "rb" field indexes on an interval tree the anon_vma_chains
* which link all the VMAs associated with this anon_vma.
*/
struct anon_vma_chain {
struct vm_area_struct *vma;
struct anon_vma *anon_vma;
struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
- struct list_head same_anon_vma; /* locked by anon_vma->mutex */
+ struct rb_node rb; /* locked by anon_vma->mutex */
+ unsigned long rb_subtree_last;
};
#ifdef CONFIG_MMU
struct anon_vma *anon_vma)
{
int mapcount, mapcount2;
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
struct anon_vma_chain *avc;
BUG_ON(!PageHead(page));
BUG_ON(PageTail(page));
mapcount = 0;
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long addr = vma_address(page, vma);
BUG_ON(is_vma_temporary_stack(vma));
__split_huge_page_refcount(page);
mapcount2 = 0;
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long addr = vma_address(page, vma);
BUG_ON(is_vma_temporary_stack(vma));
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/rmap.h>
#include <linux/interval_tree_generic.h>
static inline unsigned long vma_start_pgoff(struct vm_area_struct *v)
rb_insert_augmented(&node->shared.linear.rb, root,
&vma_interval_tree_augment);
}
+
+static inline unsigned long avc_start_pgoff(struct anon_vma_chain *avc)
+{
+ return vma_start_pgoff(avc->vma);
+}
+
+static inline unsigned long avc_last_pgoff(struct anon_vma_chain *avc)
+{
+ return vma_last_pgoff(avc->vma);
+}
+
+INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last,
+ avc_start_pgoff, avc_last_pgoff,, anon_vma_interval_tree)
struct vm_area_struct *vma;
anon_vma_lock(anon_vma);
- list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
+ 0, ULONG_MAX) {
vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
rmap_item->address >= vma->vm_end)
struct vm_area_struct *vma;
anon_vma_lock(anon_vma);
- list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
+ 0, ULONG_MAX) {
vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
rmap_item->address >= vma->vm_end)
struct vm_area_struct *vma;
anon_vma_lock(anon_vma);
- list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
+ 0, ULONG_MAX) {
vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
rmap_item->address >= vma->vm_end)
struct vm_area_struct *vma;
struct task_struct *tsk;
struct anon_vma *av;
+ pgoff_t pgoff;
av = page_lock_anon_vma(page);
if (av == NULL) /* Not actually mapped anymore */
return;
+ pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
read_lock(&tasklist_lock);
for_each_process (tsk) {
struct anon_vma_chain *vmac;
if (!task_early_kill(tsk))
continue;
- list_for_each_entry(vmac, &av->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(vmac, &av->rb_root,
+ pgoff, pgoff) {
vma = vmac->vma;
if (!page_mapped_in_vma(page, vma))
continue;
#define validate_mm(mm) do { } while (0)
#endif
+/*
+ * vma has some anon_vma assigned, and is already inserted on that
+ * anon_vma's interval trees.
+ *
+ * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
+ * vma must be removed from the anon_vma's interval trees using
+ * anon_vma_interval_tree_pre_update_vma().
+ *
+ * After the update, the vma will be reinserted using
+ * anon_vma_interval_tree_post_update_vma().
+ *
+ * The entire update must be protected by exclusive mmap_sem and by
+ * the root anon_vma's mutex.
+ */
+static inline void
+anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
+{
+ struct anon_vma_chain *avc;
+
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
+}
+
+static inline void
+anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
+{
+ struct anon_vma_chain *avc;
+
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
+}
+
static int find_vma_links(struct mm_struct *mm, unsigned long addr,
unsigned long end, struct vm_area_struct **pprev,
struct rb_node ***rb_link, struct rb_node **rb_parent)
vma_adjust_trans_huge(vma, start, end, adjust_next);
- /*
- * When changing only vma->vm_end, we don't really need anon_vma
- * lock. This is a fairly rare case by itself, but the anon_vma
- * lock may be shared between many sibling processes. Skipping
- * the lock for brk adjustments makes a difference sometimes.
- */
- if (vma->anon_vma && (importer || start != vma->vm_start)) {
- anon_vma = vma->anon_vma;
+ anon_vma = vma->anon_vma;
+ if (!anon_vma && adjust_next)
+ anon_vma = next->anon_vma;
+ if (anon_vma) {
VM_BUG_ON(adjust_next && next->anon_vma &&
anon_vma != next->anon_vma);
- } else if (adjust_next && next->anon_vma)
- anon_vma = next->anon_vma;
- if (anon_vma)
anon_vma_lock(anon_vma);
+ anon_vma_interval_tree_pre_update_vma(vma);
+ if (adjust_next)
+ anon_vma_interval_tree_pre_update_vma(next);
+ }
if (root) {
flush_dcache_mmap_lock(mapping);
__insert_vm_struct(mm, insert);
}
- if (anon_vma)
+ if (anon_vma) {
+ anon_vma_interval_tree_post_update_vma(vma);
+ if (adjust_next)
+ anon_vma_interval_tree_post_update_vma(next);
anon_vma_unlock(anon_vma);
+ }
if (mapping)
mutex_unlock(&mapping->i_mmap_mutex);
if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
error = acct_stack_growth(vma, size, grow);
if (!error) {
+ anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
+ anon_vma_interval_tree_post_update_vma(vma);
perf_event_mmap(vma);
}
}
if (grow <= vma->vm_pgoff) {
error = acct_stack_growth(vma, size, grow);
if (!error) {
+ anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_start = address;
vma->vm_pgoff -= grow;
+ anon_vma_interval_tree_post_update_vma(vma);
perf_event_mmap(vma);
}
}
static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
{
- if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
+ if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
/*
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
* anon_vma->root->mutex.
*/
if (__test_and_set_bit(0, (unsigned long *)
- &anon_vma->root->head.next))
+ &anon_vma->root->rb_root.rb_node))
BUG();
}
}
* A single task can't take more than one mm_take_all_locks() in a row
* or it would deadlock.
*
- * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in
* mapping->flags avoid to take the same lock twice, if more than one
* vma in this mm is backed by the same anon_vma or address_space.
*
static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
{
- if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
+ if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
/*
* The LSB of head.next can't change to 0 from under
* us because we hold the mm_all_locks_mutex.
*
* We must however clear the bitflag before unlocking
- * the vma so the users using the anon_vma->head will
+ * the vma so the users using the anon_vma->rb_root will
* never see our bitflag.
*
* No need of atomic instructions here, head.next
* anon_vma->root->mutex.
*/
if (!__test_and_clear_bit(0, (unsigned long *)
- &anon_vma->root->head.next))
+ &anon_vma->root->rb_root.rb_node))
BUG();
anon_vma_unlock(anon_vma);
}
avc->vma = vma;
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);
-
- /*
- * It's critical to add new vmas to the tail of the anon_vma,
- * see comment in huge_memory.c:__split_huge_page().
- */
- list_add_tail(&avc->same_anon_vma, &anon_vma->head);
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
}
/**
struct anon_vma *anon_vma = avc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
- list_del(&avc->same_anon_vma);
+ anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
/*
* Leave empty anon_vmas on the list - we'll need
* to free them outside the lock.
*/
- if (list_empty(&anon_vma->head))
+ if (RB_EMPTY_ROOT(&anon_vma->rb_root))
continue;
list_del(&avc->same_vma);
mutex_init(&anon_vma->mutex);
atomic_set(&anon_vma->refcount, 0);
- INIT_LIST_HEAD(&anon_vma->head);
+ anon_vma->rb_root = RB_ROOT;
}
void __init anon_vma_init(void)
{
unsigned int mapcount;
struct anon_vma *anon_vma;
+ pgoff_t pgoff;
struct anon_vma_chain *avc;
int referenced = 0;
return referenced;
mapcount = page_mapcount(page);
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{
struct anon_vma *anon_vma;
+ pgoff_t pgoff;
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
if (!anon_vma)
return ret;
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long address;
struct vm_area_struct *, unsigned long, void *), void *arg)
{
struct anon_vma *anon_vma;
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
if (!anon_vma)
return ret;
anon_vma_lock(anon_vma);
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)