Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide-2.6

[mv-sheeva.git] / fs / nfsd / vfs.c
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index f30cc4eadb0a8406ac23d3fdd100fec6ba44b355..4145083dcf8817ed883652939cac1faa4dfd7cac 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -55,6 +55,7 @@
  #include <linux/security.h>
  #endif /* CONFIG_NFSD_V4 */
  #include <linux/jhash.h>
+#include <linux/ima.h>
  
  #include <asm/uaccess.h>
  
@@ -100,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
  {
         struct svc_export *exp = *expp, *exp2 = NULL;
         struct dentry *dentry = *dpp;
-       struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-       struct dentry *mounts = dget(dentry);
+       struct path path = {.mnt = mntget(exp->ex_path.mnt),
+                           .dentry = dget(dentry)};
         int err = 0;
  
-       while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+       while (d_mountpoint(path.dentry) && follow_down(&path))
+               ;
  
-       exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+       exp2 = rqst_exp_get_by_name(rqstp, &path);
         if (IS_ERR(exp2)) {
                 if (PTR_ERR(exp2) != -ENOENT)
                         err = PTR_ERR(exp2);
-               dput(mounts);
-               mntput(mnt);
+               path_put(&path);
                 goto out;
         }
         if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
                 /* successfully crossed mount point */
                 /*
-                * This is subtle: dentry is *not* under mnt at this point.
-                * The only reason we are safe is that original mnt is pinned
-                * down by exp, so we should dput before putting exp.
+                * This is subtle: path.dentry is *not* on path.mnt
+                * at this point.  The only reason we are safe is that
+                * original mnt is pinned down by exp, so we should
+                * put path *before* putting exp
                  */
-               dput(dentry);
-               *dpp = mounts;
-               exp_put(exp);
+               *dpp = path.dentry;
+               path.dentry = dentry;
                 *expp = exp2;
-       } else {
-               exp_put(exp2);
-               dput(mounts);
+               exp2 = exp;
         }
-       mntput(mnt);
+       path_put(&path);
+       exp_put(exp2);
  out:
         return err;
  }
@@ -168,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                         /* checking mountpoint crossing is very different when stepping up */
                         struct svc_export *exp2 = NULL;
                         struct dentry *dp;
-                       struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-                       dentry = dget(dparent);
-                       while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+                       struct path path = {.mnt = mntget(exp->ex_path.mnt),
+                                           .dentry = dget(dparent)};
+
+                       while (path.dentry == path.mnt->mnt_root &&
+                              follow_up(&path))
                                 ;
-                       dp = dget_parent(dentry);
-                       dput(dentry);
-                       dentry = dp;
+                       dp = dget_parent(path.dentry);
+                       dput(path.dentry);
+                       path.dentry = dp;
  
-                       exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+                       exp2 = rqst_exp_parent(rqstp, &path);
                         if (PTR_ERR(exp2) == -ENOENT) {
-                               dput(dentry);
                                 dentry = dget(dparent);
                         } else if (IS_ERR(exp2)) {
                                 host_err = PTR_ERR(exp2);
-                               dput(dentry);
-                               mntput(mnt);
+                               path_put(&path);
                                 goto out_nfserr;
                         } else {
+                               dentry = dget(path.dentry);
                                 exp_put(exp);
                                 exp = exp2;
                         }
-                       mntput(mnt);
+                       path_put(&path);
                 }
         } else {
                 fh_lock(fhp);
@@ -735,6 +736,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
                             flags, cred);
         if (IS_ERR(*filp))
                 host_err = PTR_ERR(*filp);
+       else
+               ima_counts_get(*filp);
  out_nfserr:
         err = nfserrno(host_err);
  out:
@@ -963,6 +966,43 @@ static void kill_suid(struct dentry *dentry)
         mutex_unlock(&dentry->d_inode->i_mutex);
  }
  
+/*
+ * Gathered writes: If another process is currently writing to the file,
+ * there's a high chance this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the file with each write
+ * request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates C. Juszak's idea of
+ * gathered writes, but it's a nice and simple solution (IMHO), and it
+ * seems to work:-)
+ *
+ * Note: we do this only in the NFSv2 case, since v3 and higher have a
+ * better tool (separate unstable writes and commits) for solving this
+ * problem.
+ */
+static int wait_for_concurrent_writes(struct file *file)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       static ino_t last_ino;
+       static dev_t last_dev;
+       int err = 0;
+
+       if (atomic_read(&inode->i_writecount) > 1
+           || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+               msleep(10);
+               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+       }
+
+       if (inode->i_state & I_DIRTY) {
+               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+               err = nfsd_sync(file);
+       }
+       last_ino = inode->i_ino;
+       last_dev = inode->i_sb->s_dev;
+       return err;
+}
+
  static __be32
  nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                 loff_t offset, struct kvec *vec, int vlen,
@@ -1016,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
         oldfs = get_fs(); set_fs(KERNEL_DS);
         host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
         set_fs(oldfs);
-       if (host_err >= 0) {
-               *cnt = host_err;
-               nfsdstats.io_write += host_err;
-               fsnotify_modify(file->f_path.dentry);
-       }
+       if (host_err < 0)
+               goto out_nfserr;
+       *cnt = host_err;
+       nfsdstats.io_write += host_err;
+       fsnotify_modify(file->f_path.dentry);
  
         /* clear setuid/setgid flag after write */
-       if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+       if (inode->i_mode & (S_ISUID | S_ISGID))
                 kill_suid(dentry);
  
-       if (host_err >= 0 && stable) {
-               static ino_t    last_ino;
-               static dev_t    last_dev;
-
-               /*
-                * Gathered writes: If another process is currently
-                * writing to the file, there's a high chance
-                * this is another nfsd (triggered by a bulk write
-                * from a client's biod). Rather than syncing the
-                * file with each write request, we sleep for 10 msec.
-                *
-                * I don't know if this roughly approximates
-                * C. Juszak's idea of gathered writes, but it's a
-                * nice and simple solution (IMHO), and it seems to
-                * work:-)
-                */
-               if (use_wgather) {
-                       if (atomic_read(&inode->i_writecount) > 1
-                           || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-                               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
-                               msleep(10);
-                               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
-                       }
-
-                       if (inode->i_state & I_DIRTY) {
-                               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-                               host_err=nfsd_sync(file);
-                       }
-#if 0
-                       wake_up(&inode->i_wait);
-#endif
-               }
-               last_ino = inode->i_ino;
-               last_dev = inode->i_sb->s_dev;
-       }
+       if (stable && use_wgather)
+               host_err = wait_for_concurrent_writes(file);
  
+out_nfserr:
         dprintk("nfsd: write complete host_err=%d\n", host_err);
         if (host_err >= 0)
                 err = 0;
@@ -2026,6 +2034,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                                         struct dentry *dentry, int acc)
  {
         struct inode    *inode = dentry->d_inode;
+       struct path     path;
         int             err;
  
         if (acc == NFSD_MAY_NOP)
@@ -2098,7 +2107,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
         if (err == -EACCES && S_ISREG(inode->i_mode) &&
             acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
                 err = inode_permission(inode, MAY_EXEC);
+       if (err)
+               goto nfsd_out;
  
+       /* Do integrity (permission) checking now, but defer incrementing
+        * IMA counts to the actual file open.
+        */
+       path.mnt = exp->ex_path.mnt;
+       path.dentry = dentry;
+       err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
+                            IMA_COUNT_LEAVE);
+nfsd_out:
         return err? nfserrno(err) : 0;
  }