]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 13 May 2014 02:22:57 +0000 (11:22 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 13 May 2014 02:22:57 +0000 (11:22 +0900)
Pull cgroup fixes from Tejun Heo:
 "During recent restructuring, device_cgroup unified config input check
  and enforcement logic; unfortunately, it turned out to share too much.
  Aristeu's patches fix the breakage and marked for -stable backport.

  The other two patches are fallouts from kernfs conversion.  The blkcg
  change is temporary and will go away once kernfs internal locking gets
  simplified (patches pending)"

* 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  blkcg: use trylock on blkcg_pol_mutex in blkcg_reset_stats()
  device_cgroup: check if exception removal is allowed
  device_cgroup: fix the comment format for recently added functions
  device_cgroup: rework device access check and exception checking
  cgroup: fix the retry path of cgroup_mount()

block/blk-cgroup.c
kernel/cgroup.c
security/device_cgroup.c

index e4a4145926f629787ce0647f3036a98298c8f055..1039fb9ff5f5f998628884dedab422c9b405a36c 100644 (file)
@@ -451,7 +451,20 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
        struct blkcg_gq *blkg;
        int i;
 
-       mutex_lock(&blkcg_pol_mutex);
+       /*
+        * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
+        * which ends up putting cgroup's internal cgroup_tree_mutex under
+        * it; however, cgroup_tree_mutex is nested above cgroup file
+        * active protection and grabbing blkcg_pol_mutex from a cgroup
+        * file operation creates a possible circular dependency.  cgroup
+        * internal locking is planned to go through further simplification
+        * and this issue should go away soon.  For now, let's trylock
+        * blkcg_pol_mutex and restart the write on failure.
+        *
+        * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
+        */
+       if (!mutex_trylock(&blkcg_pol_mutex))
+               return restart_syscall();
        spin_lock_irq(&blkcg->lock);
 
        /*
index 9fcdaa705b6cb7442b4babcb3a9ab40564afa27b..11a03d67635a79fd18242e85b114fefaea980efe 100644 (file)
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         */
        if (!use_task_css_set_links)
                cgroup_enable_task_cg_lists();
-retry:
+
        mutex_lock(&cgroup_tree_mutex);
        mutex_lock(&cgroup_mutex);
 
@@ -1503,7 +1503,7 @@ retry:
        ret = parse_cgroupfs_options(data, &opts);
        if (ret)
                goto out_unlock;
-
+retry:
        /* look for a matching existing root */
        if (!opts.subsys_mask && !opts.none && !opts.name) {
                cgrp_dfl_root_visible = true;
@@ -1562,9 +1562,9 @@ retry:
                if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
                        mutex_unlock(&cgroup_mutex);
                        mutex_unlock(&cgroup_tree_mutex);
-                       kfree(opts.release_agent);
-                       kfree(opts.name);
                        msleep(10);
+                       mutex_lock(&cgroup_tree_mutex);
+                       mutex_lock(&cgroup_mutex);
                        goto retry;
                }
 
index 8365909f5f8cfc6f404fe5ce21b936884298d13f..9134dbf70d3ee6898664f895905c8452e89a01c3 100644 (file)
@@ -306,57 +306,138 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
 }
 
 /**
- * may_access - verifies if a new exception is part of what is allowed
- *             by a dev cgroup based on the default policy +
- *             exceptions. This is used to make sure a child cgroup
- *             won't have more privileges than its parent or to
- *             verify if a certain access is allowed.
- * @dev_cgroup: dev cgroup to be tested against
- * @refex: new exception
- * @behavior: behavior of the exception
+ * match_exception     - iterates the exception list trying to find a complete match
+ * @exceptions: list of exceptions
+ * @type: device type (DEV_BLOCK or DEV_CHAR)
+ * @major: device file major number, ~0 to match all
+ * @minor: device file minor number, ~0 to match all
+ * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
+ *
+ * It is considered a complete match if an exception is found that will
+ * contain the entire range of provided parameters.
+ *
+ * Return: true in case it matches an exception completely
  */
-static bool may_access(struct dev_cgroup *dev_cgroup,
-                      struct dev_exception_item *refex,
-                      enum devcg_behavior behavior)
+static bool match_exception(struct list_head *exceptions, short type,
+                           u32 major, u32 minor, short access)
 {
        struct dev_exception_item *ex;
-       bool match = false;
 
-       rcu_lockdep_assert(rcu_read_lock_held() ||
-                          lockdep_is_held(&devcgroup_mutex),
-                          "device_cgroup::may_access() called without proper synchronization");
+       list_for_each_entry_rcu(ex, exceptions, list) {
+               if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
+                       continue;
+               if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
+                       continue;
+               if (ex->major != ~0 && ex->major != major)
+                       continue;
+               if (ex->minor != ~0 && ex->minor != minor)
+                       continue;
+               /* provided access cannot have more than the exception rule */
+               if (access & (~ex->access))
+                       continue;
+               return true;
+       }
+       return false;
+}
+
+/**
+ * match_exception_partial - iterates the exception list trying to find a partial match
+ * @exceptions: list of exceptions
+ * @type: device type (DEV_BLOCK or DEV_CHAR)
+ * @major: device file major number, ~0 to match all
+ * @minor: device file minor number, ~0 to match all
+ * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
+ *
+ * It is considered a partial match if an exception's range is found to
+ * contain *any* of the devices specified by provided parameters. This is
+ * used to make sure no extra access is being granted that is forbidden by
+ * any of the exception list.
+ *
+ * Return: true in case the provided range mat matches an exception completely
+ */
+static bool match_exception_partial(struct list_head *exceptions, short type,
+                                   u32 major, u32 minor, short access)
+{
+       struct dev_exception_item *ex;
 
-       list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
-               if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
+       list_for_each_entry_rcu(ex, exceptions, list) {
+               if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
                        continue;
-               if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
+               if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
                        continue;
-               if (ex->major != ~0 && ex->major != refex->major)
+               /*
+                * We must be sure that both the exception and the provided
+                * range aren't masking all devices
+                */
+               if (ex->major != ~0 && major != ~0 && ex->major != major)
                        continue;
-               if (ex->minor != ~0 && ex->minor != refex->minor)
+               if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
                        continue;
-               if (refex->access & (~ex->access))
+               /*
+                * In order to make sure the provided range isn't matching
+                * an exception, all its access bits shouldn't match the
+                * exception's access bits
+                */
+               if (!(access & ex->access))
                        continue;
-               match = true;
-               break;
+               return true;
        }
+       return false;
+}
+
+/**
+ * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
+ * @dev_cgroup: dev cgroup to be tested against
+ * @refex: new exception
+ * @behavior: behavior of the exception's dev_cgroup
+ *
+ * This is used to make sure a child cgroup won't have more privileges
+ * than its parent
+ */
+static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
+                         struct dev_exception_item *refex,
+                         enum devcg_behavior behavior)
+{
+       bool match = false;
+
+       rcu_lockdep_assert(rcu_read_lock_held() ||
+                          lockdep_is_held(&devcgroup_mutex),
+                          "device_cgroup:verify_new_ex called without proper synchronization");
 
        if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
                if (behavior == DEVCG_DEFAULT_ALLOW) {
-                       /* the exception will deny access to certain devices */
+                       /*
+                        * new exception in the child doesn't matter, only
+                        * adding extra restrictions
+                        */ 
                        return true;
                } else {
-                       /* the exception will allow access to certain devices */
+                       /*
+                        * new exception in the child will add more devices
+                        * that can be acessed, so it can't match any of
+                        * parent's exceptions, even slightly
+                        */ 
+                       match = match_exception_partial(&dev_cgroup->exceptions,
+                                                       refex->type,
+                                                       refex->major,
+                                                       refex->minor,
+                                                       refex->access);
+
                        if (match)
-                               /*
-                                * a new exception allowing access shouldn't
-                                * match an parent's exception
-                                */
                                return false;
                        return true;
                }
        } else {
-               /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+               /*
+                * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
+                * the new exception will add access to more devices and must
+                * be contained completely in an parent's exception to be
+                * allowed
+                */
+               match = match_exception(&dev_cgroup->exceptions, refex->type,
+                                       refex->major, refex->minor,
+                                       refex->access);
+
                if (match)
                        /* parent has an exception that matches the proposed */
                        return true;
@@ -378,7 +459,38 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 
        if (!parent)
                return 1;
-       return may_access(parent, ex, childcg->behavior);
+       return verify_new_ex(parent, ex, childcg->behavior);
+}
+
+/**
+ * parent_allows_removal - verify if it's ok to remove an exception
+ * @childcg: child cgroup from where the exception will be removed
+ * @ex: exception being removed
+ *
+ * When removing an exception in cgroups with default ALLOW policy, it must
+ * be checked if removing it will give the child cgroup more access than the
+ * parent.
+ *
+ * Return: true if it's ok to remove exception, false otherwise
+ */
+static bool parent_allows_removal(struct dev_cgroup *childcg,
+                                 struct dev_exception_item *ex)
+{
+       struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+
+       if (!parent)
+               return true;
+
+       /* It's always allowed to remove access to devices */
+       if (childcg->behavior == DEVCG_DEFAULT_DENY)
+               return true;
+
+       /*
+        * Make sure you're not removing part or a whole exception existing in
+        * the parent cgroup
+        */
+       return !match_exception_partial(&parent->exceptions, ex->type,
+                                       ex->major, ex->minor, ex->access);
 }
 
 /**
@@ -616,17 +728,21 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 
        switch (filetype) {
        case DEVCG_ALLOW:
-               if (!parent_has_perm(devcgroup, &ex))
-                       return -EPERM;
                /*
                 * If the default policy is to allow by default, try to remove
                 * an matching exception instead. And be silent about it: we
                 * don't want to break compatibility
                 */
                if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
+                       /* Check if the parent allows removing it first */
+                       if (!parent_allows_removal(devcgroup, &ex))
+                               return -EPERM;
                        dev_exception_rm(devcgroup, &ex);
-                       return 0;
+                       break;
                }
+
+               if (!parent_has_perm(devcgroup, &ex))
+                       return -EPERM;
                rc = dev_exception_add(devcgroup, &ex);
                break;
        case DEVCG_DENY:
@@ -704,18 +820,18 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
                                        short access)
 {
        struct dev_cgroup *dev_cgroup;
-       struct dev_exception_item ex;
-       int rc;
-
-       memset(&ex, 0, sizeof(ex));
-       ex.type = type;
-       ex.major = major;
-       ex.minor = minor;
-       ex.access = access;
+       bool rc;
 
        rcu_read_lock();
        dev_cgroup = task_devcgroup(current);
-       rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
+       if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
+               /* Can't match any of the exceptions, even partially */
+               rc = !match_exception_partial(&dev_cgroup->exceptions,
+                                             type, major, minor, access);
+       else
+               /* Need to match completely one exception to be allowed */
+               rc = match_exception(&dev_cgroup->exceptions, type, major,
+                                    minor, access);
        rcu_read_unlock();
 
        if (!rc)