]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
vfs: reimplement d_rcu_to_refcount() using lockref_get_or_lock()
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Sep 2013 18:38:06 +0000 (11:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Sep 2013 18:38:06 +0000 (11:38 -0700)
This moves __d_rcu_to_refcount() from <linux/dcache.h> into fs/namei.c
and re-implements it using the lockref infrastructure instead.  It also
adds a lot of comments about what is actually going on, because turning
a dentry that was looked up using RCU into a long-lived reference
counted entry is one of the more subtle parts of the rcu walk.

We also used to be _particularly_ subtle in unlazy_walk() where we
re-validate both the dentry and its parent using the same sequence
count.  We used to do it by nesting the locks and then verifying the
sequence count just once.

That was silly, because nested locking is expensive, but the sequence
count check is not.  So this just re-validates the dentry and the parent
separately, avoiding the nested locking, and making the lockref lookup
possible.

Acked-by: Waiman Long <waiman.long@hp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/dcache.c
fs/namei.c
include/linux/dcache.h

index 2d244227999d151de8f12c423fed5d7faa99d770..96655f4f45749e530cee80a32c36fb8e6061164b 100644 (file)
@@ -1786,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
  * without taking d_lock and checking d_seq sequence count against @seq
  * returned here.
  *
- * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * A refcount may be taken on the found dentry with the d_rcu_to_refcount
  * function.
  *
  * Alternatively, __d_lookup_rcu may be called again to look up the child of
index 7720fbd5277bb8830a336cb08dc39fd0634c199f..2c30c84d4ea1da5e17381caeea6de5ae27e47a0e 100644 (file)
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
        br_read_unlock(&vfsmount_lock);
 }
 
+/*
+ * When we move over from the RCU domain to properly refcounted
+ * long-lived dentries, we need to check the sequence numbers
+ * we got before lookup very carefully.
+ *
+ * We cannot blindly increment a dentry refcount - even if it
+ * is not locked - if it is zero, because it may have gone
+ * through the final d_kill() logic already.
+ *
+ * So for a zero refcount, we need to get the spinlock (which is
+ * safe even for a dead dentry because the de-allocation is
+ * RCU-delayed), and check the sequence count under the lock.
+ *
+ * Once we have checked the sequence count, we know it is live,
+ * and since we hold the spinlock it cannot die from under us.
+ *
+ * In contrast, if the reference count wasn't zero, we can just
+ * increment the lockref without having to take the spinlock.
+ * Even if the sequence number ends up being stale, we haven't
+ * gone through the final dput() and killed the dentry yet.
+ */
+static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
+{
+       int gotref;
+
+       gotref = lockref_get_or_lock(&dentry->d_lockref);
+
+       /* Does the sequence number still match? */
+       if (read_seqcount_retry(validate, seq)) {
+               if (gotref)
+                       dput(dentry);
+               else
+                       spin_unlock(&dentry->d_lock);
+               return -ECHILD;
+       }
+
+       /* Get the ref now, if we couldn't get it originally */
+       if (!gotref) {
+               dentry->d_lockref.count++;
+               spin_unlock(&dentry->d_lock);
+       }
+       return 0;
+}
+
 /**
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
                                nd->root.dentry != fs->root.dentry)
                        goto err_root;
        }
-       spin_lock(&parent->d_lock);
+
+       /*
+        * For a negative lookup, the lookup sequence point is the parents
+        * sequence point, and it only needs to revalidate the parent dentry.
+        *
+        * For a positive lookup, we need to move both the parent and the
+        * dentry from the RCU domain to be properly refcounted. And the
+        * sequence number in the dentry validates *both* dentry counters,
+        * since we checked the sequence number of the parent after we got
+        * the child sequence number. So we know the parent must still
+        * be valid if the child sequence number is still valid.
+        */
        if (!dentry) {
-               if (!__d_rcu_to_refcount(parent, nd->seq))
-                       goto err_parent;
+               if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
+                       goto err_root;
                BUG_ON(nd->inode != parent->d_inode);
        } else {
-               if (dentry->d_parent != parent)
+               if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
+                       goto err_root;
+               if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
                        goto err_parent;
-               spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-               if (!__d_rcu_to_refcount(dentry, nd->seq))
-                       goto err_child;
-               /*
-                * If the sequence check on the child dentry passed, then
-                * the child has not been removed from its parent. This
-                * means the parent dentry must be valid and able to take
-                * a reference at this point.
-                */
-               BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
-               BUG_ON(!parent->d_lockref.count);
-               parent->d_lockref.count++;
-               spin_unlock(&dentry->d_lock);
        }
-       spin_unlock(&parent->d_lock);
        if (want_root) {
                path_get(&nd->root);
                spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
        nd->flags &= ~LOOKUP_RCU;
        return 0;
 
-err_child:
-       spin_unlock(&dentry->d_lock);
 err_parent:
-       spin_unlock(&parent->d_lock);
+       dput(dentry);
 err_root:
        if (want_root)
                spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
                nd->flags &= ~LOOKUP_RCU;
                if (!(nd->flags & LOOKUP_ROOT))
                        nd->root.mnt = NULL;
-               spin_lock(&dentry->d_lock);
-               if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
-                       spin_unlock(&dentry->d_lock);
+
+               if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
                        unlock_rcu_walk();
                        return -ECHILD;
                }
-               BUG_ON(nd->inode != dentry->d_inode);
-               spin_unlock(&dentry->d_lock);
                mntget(nd->path.mnt);
                unlock_rcu_walk();
        }
index efdc94434c300df295ef1f30b2973ae8fc0c7c2c..9169b91ea2d289e34952400b8ca89252222690e7 100644 (file)
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
 extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
                                const struct qstr *name, unsigned *seq);
 
-/**
- * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
- * @dentry: dentry to take a ref on
- * @seq: seqcount to verify against
- * Returns: 0 on failure, else 1.
- *
- * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
- * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
- */
-static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
-{
-       int ret = 0;
-
-       assert_spin_locked(&dentry->d_lock);
-       if (!read_seqcount_retry(&dentry->d_seq, seq)) {
-               ret = 1;
-               dentry->d_lockref.count++;
-       }
-
-       return ret;
-}
-
 static inline unsigned d_count(const struct dentry *dentry)
 {
        return dentry->d_lockref.count;