]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - net/ipv4/tcp.c
Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[karo-tx-linux.git] / net / ipv4 / tcp.c
index 08b8b960a8edc9e1791c395192746acfc8cb1caa..032a96d78c99deda3b3298a305298f92776e2500 100644 (file)
@@ -428,13 +428,16 @@ void tcp_init_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
-static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb)
+static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
 {
-       if (sk->sk_tsflags) {
+       if (tsflags) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);
+               struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
-               sock_tx_timestamp(sk, &shinfo->tx_flags);
-               if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
+               sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
+               if (tsflags & SOF_TIMESTAMPING_TX_ACK)
+                       tcb->txstamp_ack = 1;
+               if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
                        shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
        }
 }
@@ -906,7 +909,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
                int copy, i;
                bool can_coalesce;
 
-               if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
+               if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+                   !tcp_skb_can_collapse_to(skb)) {
 new_segment:
                        if (!sk_stream_memory_free(sk))
                                goto wait_for_sndbuf;
@@ -957,7 +961,7 @@ new_segment:
                offset += copy;
                size -= copy;
                if (!size) {
-                       tcp_tx_timestamp(sk, skb);
+                       tcp_tx_timestamp(sk, sk->sk_tsflags, skb);
                        goto out;
                }
 
@@ -1077,8 +1081,10 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
+       struct sockcm_cookie sockc;
        int flags, err, copied = 0;
        int mss_now = 0, size_goal, copied_syn = 0;
+       bool process_backlog = false;
        bool sg;
        long timeo;
 
@@ -1119,14 +1125,24 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
                /* 'common' sending to sendq */
        }
 
+       sockc.tsflags = sk->sk_tsflags;
+       if (msg->msg_controllen) {
+               err = sock_cmsg_send(sk, msg, &sockc);
+               if (unlikely(err)) {
+                       err = -EINVAL;
+                       goto out_err;
+               }
+       }
+
        /* This should be in poll */
        sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
-       mss_now = tcp_send_mss(sk, &size_goal, flags);
-
        /* Ok commence sending. */
        copied = 0;
 
+restart:
+       mss_now = tcp_send_mss(sk, &size_goal, flags);
+
        err = -EPIPE;
        if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
                goto out_err;
@@ -1144,7 +1160,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
                        copy = max - skb->len;
                }
 
-               if (copy <= 0) {
+               if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
 new_segment:
                        /* Allocate new segment. If the interface is SG,
                         * allocate skb fitting to single page.
@@ -1152,6 +1168,10 @@ new_segment:
                        if (!sk_stream_memory_free(sk))
                                goto wait_for_sndbuf;
 
+                       if (process_backlog && sk_flush_backlog(sk)) {
+                               process_backlog = false;
+                               goto restart;
+                       }
                        skb = sk_stream_alloc_skb(sk,
                                                  select_size(sk, sg),
                                                  sk->sk_allocation,
@@ -1159,6 +1179,7 @@ new_segment:
                        if (!skb)
                                goto wait_for_memory;
 
+                       process_backlog = true;
                        /*
                         * Check whether we can use HW checksum.
                         */
@@ -1237,7 +1258,9 @@ new_segment:
 
                copied += copy;
                if (!msg_data_left(msg)) {
-                       tcp_tx_timestamp(sk, skb);
+                       tcp_tx_timestamp(sk, sockc.tsflags, skb);
+                       if (unlikely(flags & MSG_EOR))
+                               TCP_SKB_CB(skb)->eor = 1;
                        goto out;
                }
 
@@ -1431,14 +1454,10 @@ static void tcp_prequeue_process(struct sock *sk)
        struct sk_buff *skb;
        struct tcp_sock *tp = tcp_sk(sk);
 
-       NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
 
-       /* RX process wants to run with disabled BHs, though it is not
-        * necessary */
-       local_bh_disable();
        while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
                sk_backlog_rcv(sk, skb);
-       local_bh_enable();
 
        /* Clear memory counter. */
        tp->ucopy.memory = 0;
@@ -1765,7 +1784,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 
                        chunk = len - tp->ucopy.len;
                        if (chunk != 0) {
-                               NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+                               NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
                                len -= chunk;
                                copied += chunk;
                        }
@@ -1777,7 +1796,7 @@ do_prequeue:
 
                                chunk = len - tp->ucopy.len;
                                if (chunk != 0) {
-                                       NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+                                       NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
                                        len -= chunk;
                                        copied += chunk;
                                }
@@ -1863,7 +1882,7 @@ skip_copy:
                        tcp_prequeue_process(sk);
 
                        if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-                               NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+                               NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
                                len -= chunk;
                                copied += chunk;
                        }
@@ -2053,13 +2072,13 @@ void tcp_close(struct sock *sk, long timeout)
                sk->sk_prot->disconnect(sk, 0);
        } else if (data_was_unread) {
                /* Unread data was tossed, zap the connection. */
-               NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
                tcp_set_state(sk, TCP_CLOSE);
                tcp_send_active_reset(sk, sk->sk_allocation);
        } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
                /* Check zero linger _after_ checking for unread data. */
                sk->sk_prot->disconnect(sk, 0);
-               NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
        } else if (tcp_close_state(sk)) {
                /* We FIN if the application ate all the data before
                 * zapping the connection.
@@ -2136,7 +2155,7 @@ adjudge_to_death:
                if (tp->linger2 < 0) {
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
-                       NET_INC_STATS_BH(sock_net(sk),
+                       __NET_INC_STATS(sock_net(sk),
                                        LINUX_MIB_TCPABORTONLINGER);
                } else {
                        const int tmo = tcp_fin_time(sk);
@@ -2155,7 +2174,7 @@ adjudge_to_death:
                if (tcp_check_oom(sk, 0)) {
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
-                       NET_INC_STATS_BH(sock_net(sk),
+                       __NET_INC_STATS(sock_net(sk),
                                        LINUX_MIB_TCPABORTONMEMORY);
                }
        }
@@ -2258,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
                ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
 }
 
+static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+{
+       struct tcp_repair_window opt;
+
+       if (!tp->repair)
+               return -EPERM;
+
+       if (len != sizeof(opt))
+               return -EINVAL;
+
+       if (copy_from_user(&opt, optbuf, sizeof(opt)))
+               return -EFAULT;
+
+       if (opt.max_window < opt.snd_wnd)
+               return -EINVAL;
+
+       if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
+               return -EINVAL;
+
+       if (after(opt.rcv_wup, tp->rcv_nxt))
+               return -EINVAL;
+
+       tp->snd_wl1     = opt.snd_wl1;
+       tp->snd_wnd     = opt.snd_wnd;
+       tp->max_window  = opt.max_window;
+
+       tp->rcv_wnd     = opt.rcv_wnd;
+       tp->rcv_wup     = opt.rcv_wup;
+
+       return 0;
+}
+
 static int tcp_repair_options_est(struct tcp_sock *tp,
                struct tcp_repair_opt __user *optbuf, unsigned int len)
 {
@@ -2585,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                else
                        tp->tsoffset = val - tcp_time_stamp;
                break;
+       case TCP_REPAIR_WINDOW:
+               err = tcp_repair_set_window(tp, optval, optlen);
+               break;
        case TCP_NOTSENT_LOWAT:
                tp->notsent_lowat = val;
                sk->sk_write_space(sk);
@@ -2841,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                        return -EINVAL;
                break;
 
+       case TCP_REPAIR_WINDOW: {
+               struct tcp_repair_window opt;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               if (len != sizeof(opt))
+                       return -EINVAL;
+
+               if (!tp->repair)
+                       return -EPERM;
+
+               opt.snd_wl1     = tp->snd_wl1;
+               opt.snd_wnd     = tp->snd_wnd;
+               opt.max_window  = tp->max_window;
+               opt.rcv_wnd     = tp->rcv_wnd;
+               opt.rcv_wup     = tp->rcv_wup;
+
+               if (copy_to_user(optval, &opt, len))
+                       return -EFAULT;
+               return 0;
+       }
        case TCP_QUEUE_SEQ:
                if (tp->repair_queue == TCP_SEND_QUEUE)
                        val = tp->write_seq;
@@ -2950,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void)
                return;
 
        for_each_possible_cpu(cpu) {
+               void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
                struct ahash_request *req;
 
+               if (!scratch) {
+                       scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
+                                              sizeof(struct tcphdr),
+                                              GFP_KERNEL,
+                                              cpu_to_node(cpu));
+                       if (!scratch)
+                               return;
+                       per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
+               }
                if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
                        continue;
 
@@ -3079,7 +3165,7 @@ void tcp_done(struct sock *sk)
        struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 
        if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-               TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+               TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
        tcp_set_state(sk, TCP_CLOSE);
        tcp_clear_xmit_timers(sk);