break;
}
policy->policy = mode;
+ policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
return policy;
}
page = vm_normal_page(vma, addr, *pte);
if (!page)
continue;
+ if (PageReserved(page))
+ continue;
nid = page_to_nid(page);
if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
continue;
static inline int vma_migratable(struct vm_area_struct *vma)
{
if (vma->vm_flags & (
- VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP))
+ VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
return 0;
return 1;
}
if (!nodes)
return 0;
- /* Update current mems_allowed */
- cpuset_update_current_mems_allowed();
- /* Ignore nodes not set in current->mems_allowed */
- cpuset_restrict_to_mems_allowed(nodes->bits);
+ cpuset_update_task_memory_state();
+ if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
+ return -EINVAL;
return mpol_check_policy(mode, nodes);
}
struct vm_area_struct *vma = NULL;
struct mempolicy *pol = current->mempolicy;
- cpuset_update_current_mems_allowed();
+ cpuset_update_task_memory_state();
if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
if (flags & MPOL_F_ADDR) {
return do_set_mempolicy(mode, &nodes);
}
-/* Macro needed until Paul implements this function in kernel/cpusets.c */
-#define cpuset_mems_allowed(task) node_online_map
-
asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
const unsigned long __user *old_nodes,
const unsigned long __user *new_nodes)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
- cpuset_update_current_mems_allowed();
+ cpuset_update_task_memory_state();
if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
unsigned nid;
* interrupt context and apply the current process NUMA policy.
* Returns NULL when no page can be allocated.
*
- * Don't call cpuset_update_current_mems_allowed() unless
+ * Don't call cpuset_update_task_memory_state() unless
* 1) it's ok to take cpuset_sem (can WAIT), and
* 2) allocating for current task (not interrupt).
*/
struct mempolicy *pol = current->mempolicy;
if ((gfp & __GFP_WAIT) && !in_interrupt())
- cpuset_update_current_mems_allowed();
+ cpuset_update_task_memory_state();
if (!pol || in_interrupt())
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
}
EXPORT_SYMBOL(alloc_pages_current);
+/*
+ * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it
+ * rebinds the mempolicy its copying by calling mpol_rebind_policy()
+ * with the mems_allowed returned by cpuset_mems_allowed(). This
+ * keeps mempolicies cpuset relative after its cpuset moves. See
+ * further kernel/cpuset.c update_nodemask().
+ */
+void *cpuset_being_rebound;
+
/* Slow path of a mempolicy copy */
struct mempolicy *__mpol_copy(struct mempolicy *old)
{
if (!new)
return ERR_PTR(-ENOMEM);
+ if (current_cpuset_is_being_rebound()) {
+ nodemask_t mems = cpuset_mems_allowed(current);
+ mpol_rebind_policy(old, &mems);
+ }
*new = *old;
atomic_set(&new->refcnt, 1);
if (new->policy == MPOL_BIND) {
return 0;
}
+void mpol_shared_policy_init(struct shared_policy *info, int policy,
+ nodemask_t *policy_nodes)
+{
+ info->root = RB_ROOT;
+ spin_lock_init(&info->lock);
+
+ if (policy != MPOL_DEFAULT) {
+ struct mempolicy *newpol;
+
+ /* Falls back to MPOL_DEFAULT on any error */
+ newpol = mpol_new(policy, policy_nodes);
+ if (!IS_ERR(newpol)) {
+ /* Create pseudo-vma that contains just the policy */
+ struct vm_area_struct pvma;
+
+ memset(&pvma, 0, sizeof(struct vm_area_struct));
+ /* Policy covers entire file */
+ pvma.vm_end = TASK_SIZE;
+ mpol_set_shared_policy(info, &pvma, newpol);
+ mpol_free(newpol);
+ }
+ }
+}
+
int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma, struct mempolicy *npol)
{
}
/* Migrate a policy to a different set of nodes */
-static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
- const nodemask_t *new)
+void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
{
+ nodemask_t *mpolmask;
nodemask_t tmp;
if (!pol)
return;
+ mpolmask = &pol->cpuset_mems_allowed;
+ if (nodes_equal(*mpolmask, *newmask))
+ return;
switch (pol->policy) {
case MPOL_DEFAULT:
break;
case MPOL_INTERLEAVE:
- nodes_remap(tmp, pol->v.nodes, *old, *new);
+ nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
pol->v.nodes = tmp;
- current->il_next = node_remap(current->il_next, *old, *new);
+ *mpolmask = *newmask;
+ current->il_next = node_remap(current->il_next,
+ *mpolmask, *newmask);
break;
case MPOL_PREFERRED:
pol->v.preferred_node = node_remap(pol->v.preferred_node,
- *old, *new);
+ *mpolmask, *newmask);
+ *mpolmask = *newmask;
break;
case MPOL_BIND: {
nodemask_t nodes;
nodes_clear(nodes);
for (z = pol->v.zonelist->zones; *z; z++)
node_set((*z)->zone_pgdat->node_id, nodes);
- nodes_remap(tmp, nodes, *old, *new);
+ nodes_remap(tmp, nodes, *mpolmask, *newmask);
nodes = tmp;
zonelist = bind_zonelist(&nodes);
kfree(pol->v.zonelist);
pol->v.zonelist = zonelist;
}
+ *mpolmask = *newmask;
break;
}
default:
}
/*
- * Someone moved this task to different nodes. Fixup mempolicies.
+ * Wrapper for mpol_rebind_policy() that just requires task
+ * pointer, and updates task mempolicy.
+ */
+
+void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
+{
+ mpol_rebind_policy(tsk->mempolicy, new);
+}
+
+/*
+ * Rebind each vma in mm to new nodemask.
*
- * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well,
- * once we have a cpuset mechanism to mark which cpuset subtree is migrating.
+ * Call holding a reference to mm. Takes mm->mmap_sem during call.
*/
-void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new)
+
+void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
- rebind_policy(current->mempolicy, old, new);
+ struct vm_area_struct *vma;
+
+ down_write(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ mpol_rebind_policy(vma->vm_policy, new);
+ up_write(&mm->mmap_sem);
}
/*