]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - net/unix/af_unix.c
next-20160111/aio
[karo-tx-linux.git] / net / unix / af_unix.c
index a1d5cf8fcd11833555e87e49c091f2e8779b3640..dd5b22a3fc91e54e403aa7829c79d0e45b0ccb0c 100644 (file)
@@ -451,7 +451,7 @@ static void unix_write_space(struct sock *sk)
        rcu_read_lock();
        if (unix_writable(sk)) {
                wq = rcu_dereference(sk->sk_wq);
-               if (wq_has_sleeper(wq))
+               if (skwq_has_sleeper(wq))
                        wake_up_interruptible_sync_poll(&wq->wait,
                                POLLOUT | POLLWRNORM | POLLWRBAND);
                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
@@ -953,32 +953,20 @@ fail:
        return NULL;
 }
 
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+                     struct path *res)
 {
-       struct dentry *dentry;
-       struct path path;
-       int err = 0;
-       /*
-        * Get the parent directory, calculate the hash for last
-        * component.
-        */
-       dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-       err = PTR_ERR(dentry);
-       if (IS_ERR(dentry))
-               return err;
+       int err;
 
-       /*
-        * All right, let's create it.
-        */
-       err = security_path_mknod(&path, dentry, mode, 0);
+       err = security_path_mknod(path, dentry, mode, 0);
        if (!err) {
-               err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+               err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
                if (!err) {
-                       res->mnt = mntget(path.mnt);
+                       res->mnt = mntget(path->mnt);
                        res->dentry = dget(dentry);
                }
        }
-       done_path_create(&path, dentry);
+
        return err;
 }
 
@@ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct unix_sock *u = unix_sk(sk);
        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
        char *sun_path = sunaddr->sun_path;
-       int err;
+       int err, name_err;
        unsigned int hash;
        struct unix_address *addr;
        struct hlist_head *list;
+       struct path path;
+       struct dentry *dentry;
 
        err = -EINVAL;
        if (sunaddr->sun_family != AF_UNIX)
@@ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                goto out;
        addr_len = err;
 
+       name_err = 0;
+       dentry = NULL;
+       if (sun_path[0]) {
+               /* Get the parent directory, calculate the hash for last
+                * component.
+                */
+               dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+               if (IS_ERR(dentry)) {
+                       /* delay report until after 'already bound' check */
+                       name_err = PTR_ERR(dentry);
+                       dentry = NULL;
+               }
+       }
+
        err = mutex_lock_interruptible(&u->readlock);
        if (err)
-               goto out;
+               goto out_path;
 
        err = -EINVAL;
        if (u->addr)
                goto out_up;
 
+       if (name_err) {
+               err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+               goto out_up;
+       }
+
        err = -ENOMEM;
        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
        if (!addr)
@@ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        addr->hash = hash ^ sk->sk_type;
        atomic_set(&addr->refcnt, 1);
 
-       if (sun_path[0]) {
-               struct path path;
+       if (dentry) {
+               struct path u_path;
                umode_t mode = S_IFSOCK |
                       (SOCK_INODE(sock)->i_mode & ~current_umask());
-               err = unix_mknod(sun_path, mode, &path);
+               err = unix_mknod(dentry, &path, mode, &u_path);
                if (err) {
                        if (err == -EEXIST)
                                err = -EADDRINUSE;
@@ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                        goto out_up;
                }
                addr->hash = UNIX_HASH_SIZE;
-               hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+               hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
                spin_lock(&unix_table_lock);
-               u->path = path;
+               u->path = u_path;
                list = &unix_socket_table[hash];
        } else {
                spin_lock(&unix_table_lock);
@@ -1063,6 +1073,10 @@ out_unlock:
        spin_unlock(&unix_table_lock);
 out_up:
        mutex_unlock(&u->readlock);
+out_path:
+       if (dentry)
+               done_path_create(&path, dentry);
+
 out:
        return err;
 }
@@ -1482,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
        UNIXCB(skb).fp = NULL;
 
        for (i = scm->fp->count-1; i >= 0; i--)
-               unix_notinflight(scm->fp->fp[i]);
+               unix_notinflight(scm->fp->user, scm->fp->fp[i]);
 }
 
 static void unix_destruct_scm(struct sk_buff *skb)
@@ -1499,22 +1513,37 @@ static void unix_destruct_scm(struct sk_buff *skb)
        sock_wfree(skb);
 }
 
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+       struct user_struct *user = current_user();
+
+       if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+               return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+       return false;
+}
+
 #define MAX_RECURSION_LEVEL 4
 
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
        int i;
        unsigned char max_level = 0;
-       int unix_sock_count = 0;
+
+       if (too_many_unix_fds(current))
+               return -ETOOMANYREFS;
 
        for (i = scm->fp->count - 1; i >= 0; i--) {
                struct sock *sk = unix_get_socket(scm->fp->fp[i]);
 
-               if (sk) {
-                       unix_sock_count++;
+               if (sk)
                        max_level = max(max_level,
                                        unix_sk(sk)->recursion_level);
-               }
        }
        if (unlikely(max_level > MAX_RECURSION_LEVEL))
                return -ETOOMANYREFS;
@@ -1528,10 +1557,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
        if (!UNIXCB(skb).fp)
                return -ENOMEM;
 
-       if (unix_sock_count) {
-               for (i = scm->fp->count - 1; i >= 0; i--)
-                       unix_inflight(scm->fp->fp[i]);
-       }
+       for (i = scm->fp->count - 1; i >= 0; i--)
+               unix_inflight(scm->fp->user, scm->fp->fp[i]);
        return max_level;
 }
 
@@ -2078,8 +2105,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
        struct scm_cookie scm;
        struct sock *sk = sock->sk;
        struct unix_sock *u = unix_sk(sk);
-       int noblock = flags & MSG_DONTWAIT;
-       struct sk_buff *skb;
+       struct sk_buff *skb, *last;
+       long timeo;
        int err;
        int peeked, skip;
 
@@ -2087,30 +2114,38 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
        if (flags&MSG_OOB)
                goto out;
 
-       err = mutex_lock_interruptible(&u->readlock);
-       if (unlikely(err)) {
-               /* recvmsg() in non blocking mode is supposed to return -EAGAIN
-                * sk_rcvtimeo is not honored by mutex_lock_interruptible()
-                */
-               err = noblock ? -EAGAIN : -ERESTARTSYS;
-               goto out;
-       }
+       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
-       skip = sk_peek_offset(sk, flags);
+       do {
+               mutex_lock(&u->readlock);
 
-       skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
-       if (!skb) {
+               skip = sk_peek_offset(sk, flags);
+               skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
+                                             &last);
+               if (skb)
+                       break;
+
+               mutex_unlock(&u->readlock);
+
+               if (err != -EAGAIN)
+                       break;
+       } while (timeo &&
+                !__skb_wait_for_more_packets(sk, &err, &timeo, last));
+
+       if (!skb) { /* implies readlock unlocked */
                unix_state_lock(sk);
                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
                    (sk->sk_shutdown & RCV_SHUTDOWN))
                        err = 0;
                unix_state_unlock(sk);
-               goto out_unlock;
+               goto out;
        }
 
-       wake_up_interruptible_sync_poll(&u->peer_wait,
-                                       POLLOUT | POLLWRNORM | POLLWRBAND);
+       if (wq_has_sleeper(&u->peer_wait))
+               wake_up_interruptible_sync_poll(&u->peer_wait,
+                                               POLLOUT | POLLWRNORM |
+                                               POLLWRBAND);
 
        if (msg->msg_name)
                unix_copy_addr(msg, skb->sk);
@@ -2162,7 +2197,6 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 
 out_free:
        skb_free_datagram(sk, skb);
-out_unlock:
        mutex_unlock(&u->readlock);
 out:
        return err;
@@ -2302,6 +2336,7 @@ again:
 
                        if (signal_pending(current)) {
                                err = sock_intr_errno(timeo);
+                               scm_destroy(&scm);
                                goto out;
                        }