The root device cgroup starts with rwm to 'all'. A child device
cgroup gets a copy of the parent. Administrators can then remove
devices from the whitelist or add new entries. A child cgroup can
-never receive a device access which is denied by its parent. However
-when a device access is removed from a parent it will not also be
-removed from the child(ren).
+never receive a device access which is denied by its parent.
2. User Interface
A cgroup may not be granted more permissions than the cgroup's
parent has.
+
+4. Hierarchy
+
+device cgroups maintain hierarchy by making sure a cgroup never has more
+access permissions than its parent. Every time an entry is written to
+a cgroup's devices.deny file, all its children will have that entry removed
+from their whitelist and all the locally set whitelist entries will be
+re-evaluated. In case one of the locally set whitelist entries would provide
+more access than the cgroup's parent, it'll be removed from the whitelist.
+
+Example:
+ A
+ / \
+ B
+
+ group behavior exceptions
+ A allow "b 8:* rwm", "c 116:1 rw"
+ B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
+
+If a device is denied in group A:
+ # echo "c 116:* r" > A/devices.deny
+it'll propagate down and after revalidating B's entries, the whitelist entry
+"c 116:2 rwm" will be removed:
+
+ group whitelist entries denied devices
+ A all "b 8:* rwm", "c 116:* rw"
+ B "c 1:3 rwm", "b 3:* rwm" all the rest
+
+In case parent's exceptions change and local exceptions are not allowed
+anymore, they'll be deleted.
+
+Notice that new whitelist entries will not be propagated:
+ A
+ / \
+ B
+
+ group whitelist entries denied devices
+ A "c 1:3 rwm", "c 1:5 r" all the rest
+ B "c 1:3 rwm", "c 1:5 r" all the rest
+
+when adding "c *:3 rwm":
+ # echo "c *:3 rwm" >A/devices.allow
+
+the result:
+ group whitelist entries denied devices
+ A "c *:3 rwm", "c 1:5 r" all the rest
+ B "c 1:3 rwm", "c 1:5 r" all the rest
+
+but now it'll be possible to add new entries to B:
+ # echo "c 2:3 rwm" >B/devices.allow
+ # echo "c 50:3 r" >B/devices.allow
+or even
+ # echo "c *:3 rwm" >B/devices.allow
+
+Allowing or denying all by writing 'a' to devices.allow or devices.deny will
+not be possible once the device cgroups has children.
+
+4.1 Hierarchy (internal implementation)
+
+device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
+list of exceptions. The internal state is controlled using the same user
+interface to preserve compatibility with the previous whitelist-only
+implementation. Removal or addition of exceptions that will reduce the access
+to devices will be propagated down the hierarchy.
+For every propagated exception, the effective rules will be re-evaluated based
+on current parent's access rules.
struct cgroup_subsys_state css;
struct list_head exceptions;
enum devcg_behavior behavior;
+ /* temporary list for pending propagation operations */
+ struct list_head propagate_pending;
};
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
__dev_exception_clean(dev_cgroup);
}
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+ return (devcg->behavior != DEVCG_DEFAULT_NONE);
+}
+
/**
* devcgroup_online - initializes devcgroup's behavior and exceptions based on
* parent's
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&dev_cgroup->exceptions);
+ INIT_LIST_HEAD(&dev_cgroup->propagate_pending);
dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
return parent->behavior == DEVCG_DEFAULT_ALLOW;
}
+/**
+ * revalidate_active_exceptions - walks through the active exception list and
+ * revalidates the exceptions based on parent's
+ * behavior and exceptions. The exceptions that
+ * are no longer valid will be removed.
+ * Called with devcgroup_mutex held.
+ * @devcg: cgroup which exceptions will be checked
+ *
+ * This is one of the three key functions for hierarchy implementation.
+ * This function is responsible for re-evaluating all the cgroup's active
+ * exceptions due to a parent's exception change.
+ * Refer to Documentation/cgroups/devices.txt for more details.
+ */
+static void revalidate_active_exceptions(struct dev_cgroup *devcg)
+{
+ struct dev_exception_item *ex;
+ struct list_head *this, *tmp;
+
+ list_for_each_safe(this, tmp, &devcg->exceptions) {
+ ex = container_of(this, struct dev_exception_item, list);
+ if (!parent_has_perm(devcg, ex))
+ dev_exception_rm(devcg, ex);
+ }
+}
+
+/**
+ * get_online_devcg - walks the cgroup tree and fills a list with the online
+ * groups
+ * @root: cgroup used as starting point
+ * @online: list that will be filled with online groups
+ *
+ * Must be called with devcgroup_mutex held. Grabs RCU lock.
+ * Because devcgroup_mutex is held, no devcg will become online or offline
+ * during the tree walk (see devcgroup_online, devcgroup_offline)
+ * A separated list is needed because propagate_behavior() and
+ * propagate_exception() need to allocate memory and can block.
+ */
+static void get_online_devcg(struct cgroup *root, struct list_head *online)
+{
+ struct cgroup *pos;
+ struct dev_cgroup *devcg;
+
+ lockdep_assert_held(&devcgroup_mutex);
+
+ rcu_read_lock();
+ cgroup_for_each_descendant_pre(pos, root) {
+ devcg = cgroup_to_devcgroup(pos);
+ if (is_devcg_online(devcg))
+ list_add_tail(&devcg->propagate_pending, online);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * propagate_exception - propagates a new exception to the children
+ * @devcg_root: device cgroup that added a new exception
+ * @ex: new exception to be propagated
+ *
+ * returns: 0 in case of success, != 0 in case of error
+ */
+static int propagate_exception(struct dev_cgroup *devcg_root,
+ struct dev_exception_item *ex)
+{
+ struct cgroup *root = devcg_root->css.cgroup;
+ struct dev_cgroup *devcg, *parent, *tmp;
+ int rc = 0;
+ LIST_HEAD(pending);
+
+ get_online_devcg(root, &pending);
+
+ list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) {
+ parent = cgroup_to_devcgroup(devcg->css.cgroup->parent);
+
+ /*
+ * in case both root's behavior and devcg is allow, a new
+ * restriction means adding to the exception list
+ */
+ if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
+ devcg->behavior == DEVCG_DEFAULT_ALLOW) {
+ rc = dev_exception_add(devcg, ex);
+ if (rc)
+ break;
+ } else {
+ /*
+ * in the other possible cases:
+ * root's behavior: allow, devcg's: deny
+ * root's behavior: deny, devcg's: deny
+ * the exception will be removed
+ */
+ dev_exception_rm(devcg, ex);
+ }
+ revalidate_active_exceptions(devcg);
+
+ list_del_init(&devcg->propagate_pending);
+ }
+ return rc;
+}
+
+static inline bool has_children(struct dev_cgroup *devcgroup)
+{
+ struct cgroup *cgrp = devcgroup->css.cgroup;
+
+ return !list_empty(&cgrp->children);
+}
+
/*
* Modify the exception list using allow/deny rules.
* CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD
case 'a':
switch (filetype) {
case DEVCG_ALLOW:
+ if (has_children(devcgroup))
+ return -EINVAL;
+
if (!may_allow_all(parent))
return -EPERM;
dev_exception_clean(devcgroup);
return rc;
break;
case DEVCG_DENY:
+ if (has_children(devcgroup))
+ return -EINVAL;
+
dev_exception_clean(devcgroup);
devcgroup->behavior = DEVCG_DEFAULT_DENY;
break;
dev_exception_rm(devcgroup, &ex);
return 0;
}
- return dev_exception_add(devcgroup, &ex);
+ rc = dev_exception_add(devcgroup, &ex);
+ break;
case DEVCG_DENY:
/*
* If the default policy is to deny by default, try to remove
* an matching exception instead. And be silent about it: we
* don't want to break compatibility
*/
- if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
+ if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
dev_exception_rm(devcgroup, &ex);
- return 0;
- }
- return dev_exception_add(devcgroup, &ex);
+ else
+ rc = dev_exception_add(devcgroup, &ex);
+
+ if (rc)
+ break;
+ /* we only propagate new restrictions */
+ rc = propagate_exception(devcgroup, &ex);
+ break;
default:
- return -EINVAL;
+ rc = -EINVAL;
}
- return 0;
+ return rc;
}
static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,