#include <linux/hash.h>
#include <linux/freezer.h>
#include <linux/oom.h>
+#include <linux/numa.h>
#include <asm/tlbflush.h>
#include "internal.h"
unsigned long address; /* + low bits used for flags below */
unsigned int oldchecksum; /* when unstable */
union {
- struct rb_node node; /* when node of unstable tree */
+ struct {
+ struct rb_node node; /* when node of unstable tree */
+ struct rb_root *root;
+ };
struct { /* when listed from stable tree */
struct stable_node *head;
struct hlist_node hlist;
#define STABLE_FLAG 0x200 /* is listed from the stable tree */
/* The stable and unstable tree heads */
-static struct rb_root root_stable_tree = RB_ROOT;
-static struct rb_root root_unstable_tree = RB_ROOT;
+static struct rb_root root_unstable_tree[MAX_NUMNODES] = { RB_ROOT, };
+static struct rb_root root_stable_tree[MAX_NUMNODES] = { RB_ROOT, };
#define MM_SLOTS_HASH_SHIFT 10
#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;
+/* Zeroed when merging across nodes is not allowed */
+static unsigned int ksm_merge_across_nodes = 1;
+
#define KSM_RUN_STOP 0
#define KSM_RUN_MERGE 1
#define KSM_RUN_UNMERGE 2
return page;
}
+/*
+ * This helper is used for getting right index into array of tree roots.
+ * When merge_across_nodes knob is set to 1, there are only two rb-trees for
+ * stable and unstable pages from all nodes with roots in index 0. Otherwise,
+ * every node has its own stable and unstable tree.
+ */
+static inline int get_kpfn_nid(unsigned long kpfn)
+{
+ if (ksm_merge_across_nodes)
+ return 0;
+ else
+ return pfn_to_nid(kpfn);
+}
+
static void remove_node_from_stable_tree(struct stable_node *stable_node)
{
struct rmap_item *rmap_item;
struct hlist_node *hlist;
+ int nid;
hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
if (rmap_item->hlist.next)
cond_resched();
}
- rb_erase(&stable_node->node, &root_stable_tree);
+ nid = get_kpfn_nid(stable_node->kpfn);
+
+ rb_erase(&stable_node->node,
+ &root_stable_tree[nid]);
free_stable_node(stable_node);
}
age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
BUG_ON(age > 1);
if (!age)
- rb_erase(&rmap_item->node, &root_unstable_tree);
+ rb_erase(&rmap_item->node, rmap_item->root);
ksm_pages_unshared--;
rmap_item->address &= PAGE_MASK;
*/
static struct page *stable_tree_search(struct page *page)
{
- struct rb_node *node = root_stable_tree.rb_node;
+ struct rb_node *node;
struct stable_node *stable_node;
+ int nid;
stable_node = page_stable_node(page);
if (stable_node) { /* ksm page forked */
return page;
}
+ nid = get_kpfn_nid(page_to_pfn(page));
+ node = root_stable_tree[nid].rb_node;
+
while (node) {
struct page *tree_page;
int ret;
*/
static struct stable_node *stable_tree_insert(struct page *kpage)
{
- struct rb_node **new = &root_stable_tree.rb_node;
+ int nid;
+ struct rb_node **new = NULL;
struct rb_node *parent = NULL;
struct stable_node *stable_node;
+ nid = get_kpfn_nid(page_to_nid(kpage));
+ new = &root_stable_tree[nid].rb_node;
+
while (*new) {
struct page *tree_page;
int ret;
return NULL;
rb_link_node(&stable_node->node, parent, new);
- rb_insert_color(&stable_node->node, &root_stable_tree);
+ rb_insert_color(&stable_node->node, &root_stable_tree[nid]);
INIT_HLIST_HEAD(&stable_node->hlist);
struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
struct page *page,
struct page **tree_pagep)
-
{
- struct rb_node **new = &root_unstable_tree.rb_node;
+ struct rb_node **new = NULL;
+ struct rb_root *root;
struct rb_node *parent = NULL;
+ int nid;
+
+ nid = get_kpfn_nid(page_to_pfn(page));
+ root = &root_unstable_tree[nid];
+
+ new = &root->rb_node;
while (*new) {
struct rmap_item *tree_rmap_item;
rmap_item->address |= UNSTABLE_FLAG;
rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
+ rmap_item->root = root;
rb_link_node(&rmap_item->node, parent, new);
- rb_insert_color(&rmap_item->node, &root_unstable_tree);
+ rb_insert_color(&rmap_item->node, root);
ksm_pages_unshared++;
return NULL;
struct mm_slot *slot;
struct vm_area_struct *vma;
struct rmap_item *rmap_item;
+ int i;
if (list_empty(&ksm_mm_head.mm_list))
return NULL;
*/
lru_add_drain_all();
- root_unstable_tree = RB_ROOT;
+ for (i = 0; i < MAX_NUMNODES; i++)
+ root_unstable_tree[i] = RB_ROOT;
spin_lock(&ksm_mmlist_lock);
slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
stable_node = page_stable_node(newpage);
if (stable_node) {
VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
- stable_node->kpfn = page_to_pfn(newpage);
+
+ if (ksm_merge_across_nodes ||
+ page_to_nid(oldpage) == page_to_nid(newpage))
+ stable_node->kpfn = page_to_pfn(newpage);
+ else
+ remove_node_from_stable_tree(stable_node);
}
}
#endif /* CONFIG_MIGRATION */
unsigned long end_pfn)
{
struct rb_node *node;
+ int i;
- for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
- struct stable_node *stable_node;
+ for (i = 0; i < MAX_NUMNODES; i++)
+ for (node = rb_first(&root_stable_tree[i]); node;
+ node = rb_next(node)) {
+ struct stable_node *stable_node;
+
+ stable_node = rb_entry(node, struct stable_node, node);
+ if (stable_node->kpfn >= start_pfn &&
+ stable_node->kpfn < end_pfn)
+ return stable_node;
+ }
- stable_node = rb_entry(node, struct stable_node, node);
- if (stable_node->kpfn >= start_pfn &&
- stable_node->kpfn < end_pfn)
- return stable_node;
- }
return NULL;
}
}
KSM_ATTR(run);
+#ifdef CONFIG_NUMA
+static ssize_t merge_across_nodes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", ksm_merge_across_nodes);
+}
+
+static ssize_t merge_across_nodes_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err;
+ unsigned long knob;
+
+ err = kstrtoul(buf, 10, &knob);
+ if (err)
+ return err;
+ if (knob > 1)
+ return -EINVAL;
+
+ mutex_lock(&ksm_thread_mutex);
+ if (ksm_run & KSM_RUN_MERGE) {
+ err = -EBUSY;
+ } else {
+ if (ksm_merge_across_nodes != knob) {
+ if (ksm_pages_shared > 0)
+ err = -EBUSY;
+ else
+ ksm_merge_across_nodes = knob;
+ }
+ }
+
+ if (err)
+ count = err;
+ mutex_unlock(&ksm_thread_mutex);
+
+ return count;
+}
+KSM_ATTR(merge_across_nodes);
+#endif
+
static ssize_t pages_shared_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
&pages_unshared_attr.attr,
&pages_volatile_attr.attr,
&full_scans_attr.attr,
+#ifdef CONFIG_NUMA
+ &merge_across_nodes_attr.attr,
+#endif
NULL,
};