tcp_fastopen - INTEGER
Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
in the opening SYN packet. To use this feature, the client application
- must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than
- connect() to perform a TCP handshake automatically.
-
- The values (bitmap) are
- 1: Enables sending data in the opening SYN on the client.
- 2: Enables TCP Fast Open on the server side, i.e., allowing data in
- a SYN packet to be accepted and passed to the application before
- 3-way hand shake finishes.
- 4: Send data in the opening SYN regardless of cookie availability and
- without a cookie option.
- 0x100: Accept SYN data w/o validating the cookie.
- 0x200: Accept data-in-SYN w/o any cookie option present.
- 0x400/0x800: Enable Fast Open on all listeners regardless of the
- TCP_FASTOPEN socket option. The two different flags designate two
- different ways of setting max_qlen without the TCP_FASTOPEN socket
- option.
+ must not use connect(). Instead, it should use sendmsg() or sendto()
+ with MSG_FASTOPEN flag which performs a TCP handshake automatically.
- Default: 0
-
- Note that the client & server side Fast Open flags (1 and 2
- respectively) must be also enabled before the rest of flags can take
- effect.
+ The values (bitmap) are:
+ 1: Enables sending data in the opening SYN on the client
+ 5: Enables sending data in the opening SYN on the client regardless
+ of cookie availability.
- See include/net/tcp.h and the code for more details.
+ Default: 0
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */
LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */
LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */
- LINUX_MIB_TCPFASTOPENPASSIVE, /* TCPFastOpenPassive*/
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */
- LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */
- LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */
__LINUX_MIB_MAX
};
#define TCP_REPAIR_QUEUE 20
#define TCP_QUEUE_SEQ 21
#define TCP_REPAIR_OPTIONS 22
-#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
struct tcp_repair_opt {
__u32 opt_code;
/* TCP Fast Open */
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
-#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */
/* TCP Fast Open Cookie as stored in memory */
struct tcp_fastopen_cookie {
/* Only used by TCP MD5 Signature so far. */
const struct tcp_request_sock_ops *af_specific;
#endif
- struct sock *listener; /* needed for TFO */
u32 rcv_isn;
u32 snt_isn;
u32 snt_synack; /* synack sent time */
- u32 rcv_nxt; /* the ack # by SYNACK. For
- * FastOpen it's the seq#
- * after data-in-SYN.
- */
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
struct tcp_md5sig_info __rcu *md5sig_info;
#endif
+/* TCP fastopen related information */
+ struct tcp_fastopen_request *fastopen_req;
+
/* When the cookie options are generated and exchanged, then this
* object holds a reference to them (cookie_values->kref). Also
* contains related tcp_cookie_transactions fields.
*/
struct tcp_cookie_values *cookie_values;
-
-/* TCP fastopen related information */
- struct tcp_fastopen_request *fastopen_req;
- /* fastopen_rsk points to request_sock that resulted in this big
- * socket. Used to retransmit SYNACKs etc.
- */
- struct request_sock *fastopen_rsk;
};
enum tsq_flags {
return (struct tcp_timewait_sock *)sk;
}
-static inline bool tcp_passive_fastopen(const struct sock *sk)
-{
- return (sk->sk_state == TCP_SYN_RECV &&
- tcp_sk(sk)->fastopen_rsk != NULL);
-}
-
-static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
-{
- return foc->len != -1;
-}
-
-static inline int fastopen_init_queue(struct sock *sk, int backlog)
-{
- struct request_sock_queue *queue =
- &inet_csk(sk)->icsk_accept_queue;
-
- if (queue->fastopenq == NULL) {
- queue->fastopenq = kzalloc(
- sizeof(struct fastopen_queue),
- sk->sk_allocation);
- if (queue->fastopenq == NULL)
- return -ENOMEM;
- spin_lock_init(&queue->fastopenq->lock);
- }
- queue->fastopenq->max_qlen = backlog;
- return 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _LINUX_TCP_H */
struct request_sock *syn_table[0];
};
-/*
- * For a TCP Fast Open listener -
- * lock - protects the access to all the reqsk, which is co-owned by
- * the listener and the child socket.
- * qlen - pending TFO requests (still in TCP_SYN_RECV).
- * max_qlen - max TFO reqs allowed before TFO is disabled.
- *
- * XXX (TFO) - ideally these fields can be made as part of "listen_sock"
- * structure above. But there is some implementation difficulty due to
- * listen_sock being part of request_sock_queue hence will be freed when
- * a listener is stopped. But TFO related fields may continue to be
- * accessed even after a listener is closed, until its sk_refcnt drops
- * to 0 implying no more outstanding TFO reqs. One solution is to keep
- * listen_opt around until sk_refcnt drops to 0. But there is some other
- * complexity that needs to be resolved. E.g., a listener can be disabled
- * temporarily through shutdown()->tcp_disconnect(), and re-enabled later.
- */
-struct fastopen_queue {
- struct request_sock *rskq_rst_head; /* Keep track of past TFO */
- struct request_sock *rskq_rst_tail; /* requests that caused RST.
- * This is part of the defense
- * against spoofing attack.
- */
- spinlock_t lock;
- int qlen; /* # of pending (TCP_SYN_RECV) reqs */
- int max_qlen; /* != 0 iff TFO is currently enabled */
-};
-
/** struct request_sock_queue - queue of request_socks
*
* @rskq_accept_head - FIFO head of established children
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt;
- struct fastopen_queue *fastopenq; /* This is non-NULL iff TFO has been
- * enabled on this listener. Check
- * max_qlen != 0 in fastopen_queue
- * to determine if TFO is enabled
- * right at this moment.
- */
};
extern int reqsk_queue_alloc(struct request_sock_queue *queue,
extern void __reqsk_queue_destroy(struct request_sock_queue *queue);
extern void reqsk_queue_destroy(struct request_sock_queue *queue);
-extern void reqsk_fastopen_remove(struct sock *sk,
- struct request_sock *req, bool reset);
static inline struct request_sock *
reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
return req;
}
+static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
+ struct sock *parent)
+{
+ struct request_sock *req = reqsk_queue_remove(queue);
+ struct sock *child = req->sk;
+
+ WARN_ON(child == NULL);
+
+ sk_acceptq_removed(parent);
+ __reqsk_free(req);
+ return child;
+}
+
static inline int reqsk_queue_removed(struct request_sock_queue *queue,
struct request_sock *req)
{
/* Bit Flags for sysctl_tcp_fastopen */
#define TFO_CLIENT_ENABLE 1
-#define TFO_SERVER_ENABLE 2
#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */
-/* Process SYN data but skip cookie validation */
-#define TFO_SERVER_COOKIE_NOT_CHKED 0x100
-/* Accept SYN data w/o any cookie option */
-#define TFO_SERVER_COOKIE_NOT_REQD 0x200
-
-/* Force enable TFO on all listeners, i.e., not requiring the
- * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
- */
-#define TFO_SERVER_WO_SOCKOPT1 0x400
-#define TFO_SERVER_WO_SOCKOPT2 0x800
-/* Always create TFO child sockets on a TFO listener even when
- * cookie/data not present. (For testing purpose!)
- */
-#define TFO_SERVER_ALWAYS 0x1000
-
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */
const struct tcphdr *th);
extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock *req,
- struct request_sock **prev,
- bool fastopen);
+ struct request_sock **prev);
extern int tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
extern bool tcp_use_frto(struct sock *sk);
extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
extern bool tcp_remember_stamp(struct sock *sk);
extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+ struct tcp_fastopen_cookie *cookie,
+ int *syn_loss, unsigned long *last_syn_loss);
+extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
+ struct tcp_fastopen_cookie *cookie,
+ bool syn_lost);
extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
extern void tcp_disable_fack(struct tcp_sock *tp);
extern void tcp_close(struct sock *sk, long timeout);
extern int tcp_connect(struct sock *sk);
extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct request_values *rvp,
- struct tcp_fastopen_cookie *foc);
+ struct request_values *rvp);
extern int tcp_disconnect(struct sock *sk, int flags);
void tcp_connect_init(struct sock *sk);
extern void tcp_cwnd_application_limited(struct sock *sk);
extern void tcp_resume_early_retransmit(struct sock *sk);
extern void tcp_rearm_rto(struct sock *sk);
-extern void tcp_reset(struct sock *sk);
/* tcp_timer.c */
extern void tcp_init_xmit_timers(struct sock *);
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
extern void tcp_mtup_init(struct sock *sk);
extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
-extern void tcp_init_buffer_space(struct sock *sk);
static inline void tcp_bound_rto(const struct sock *sk)
{
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
ireq->tstamp_ok = rx_opt->tstamp_ok;
extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
const struct tcp_md5sig_key *key);
-/* From tcp_fastopen.c */
-extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- struct tcp_fastopen_cookie *cookie,
- int *syn_loss, unsigned long *last_syn_loss);
-extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
- struct tcp_fastopen_cookie *cookie,
- bool syn_lost);
struct tcp_fastopen_request {
/* Fast Open cookie. Size 0 means a cookie request */
struct tcp_fastopen_cookie cookie;
struct msghdr *data; /* data in MSG_FASTOPEN */
u16 copied; /* queued in tcp_connect() */
};
-void tcp_free_fastopen_req(struct tcp_sock *tp);
-
-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
-void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc);
-
-#define TCP_FASTOPEN_KEY_LENGTH 16
-/* Fastopen key context */
-struct tcp_fastopen_context {
- struct crypto_cipher __rcu *tfm;
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
- struct rcu_head rcu;
-};
+void tcp_free_fastopen_req(struct tcp_sock *tp);
/* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk)
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/tcp.h>
#include <linux/vmalloc.h>
#include <net/request_sock.h>
kfree(lopt);
}
-/*
- * This function is called to set a Fast Open socket's "fastopen_rsk" field
- * to NULL when a TFO socket no longer needs to access the request_sock.
- * This happens only after 3WHS has been either completed or aborted (e.g.,
- * RST is received).
- *
- * Before TFO, a child socket is created only after 3WHS is completed,
- * hence it never needs to access the request_sock. things get a lot more
- * complex with TFO. A child socket, accepted or not, has to access its
- * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
- * until 3WHS is either completed or aborted. Afterwards the req will stay
- * until either the child socket is accepted, or in the rare case when the
- * listener is closed before the child is accepted.
- *
- * In short, a request socket is only freed after BOTH 3WHS has completed
- * (or aborted) and the child socket has been accepted (or listener closed).
- * When a child socket is accepted, its corresponding req->sk is set to
- * NULL since it's no longer needed. More importantly, "req->sk == NULL"
- * will be used by the code below to determine if a child socket has been
- * accepted or not, and the check is protected by the fastopenq->lock
- * described below.
- *
- * Note that fastopen_rsk is only accessed from the child socket's context
- * with its socket lock held. But a request_sock (req) can be accessed by
- * both its child socket through fastopen_rsk, and a listener socket through
- * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
- * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
- * only in the rare case when both the listener and the child locks are held,
- * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
- * The lock also protects other fields such as fastopenq->qlen, which is
- * decremented by this function when fastopen_rsk is no longer needed.
- *
- * Note that another solution was to simply use the existing socket lock
- * from the listener. But first socket lock is difficult to use. It is not
- * a simple spin lock - one must consider sock_owned_by_user() and arrange
- * to use sk_add_backlog() stuff. But what really makes it infeasible is the
- * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
- * acquire a child's lock while holding listener's socket lock. A corner
- * case might also exist in tcp_v4_hnd_req() that will trigger this locking
- * order.
- *
- * When a TFO req is created, it needs to sock_hold its listener to prevent
- * the latter data structure from going away.
- *
- * This function also sets "treq->listener" to NULL and unreference listener
- * socket. treq->listener is used by the listener so it is protected by the
- * fastopenq->lock in this function.
- */
-void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
- bool reset)
-{
- struct sock *lsk = tcp_rsk(req)->listener;
- struct fastopen_queue *fastopenq =
- inet_csk(lsk)->icsk_accept_queue.fastopenq;
-
- BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
-
- tcp_sk(sk)->fastopen_rsk = NULL;
- spin_lock_bh(&fastopenq->lock);
- fastopenq->qlen--;
- tcp_rsk(req)->listener = NULL;
- if (req->sk) /* the child socket hasn't been accepted yet */
- goto out;
-
- if (!reset || lsk->sk_state != TCP_LISTEN) {
- /* If the listener has been closed don't bother with the
- * special RST handling below.
- */
- spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
- reqsk_free(req);
- return;
- }
- /* Wait for 60secs before removing a req that has triggered RST.
- * This is a simple defense against TFO spoofing attack - by
- * counting the req against fastopen.max_qlen, and disabling
- * TFO when the qlen exceeds max_qlen.
- *
- * For more details see CoNext'11 "TCP Fast Open" paper.
- */
- req->expires = jiffies + 60*HZ;
- if (fastopenq->rskq_rst_head == NULL)
- fastopenq->rskq_rst_head = req;
- else
- fastopenq->rskq_rst_tail->dl_next = req;
-
- req->dl_next = NULL;
- fastopenq->rskq_rst_tail = req;
- fastopenq->qlen++;
-out:
- spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
- return;
-}
pr_err("Attempt to release alive inet socket %p\n", sk);
return;
}
- if (sk->sk_type == SOCK_STREAM) {
- struct fastopen_queue *fastopenq =
- inet_csk(sk)->icsk_accept_queue.fastopenq;
- kfree(fastopenq);
- }
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
- /* Check special setups for testing purpose to enable TFO w/o
- * requiring TCP_FASTOPEN sockopt.
- * Note that only TCP sockets (SOCK_STREAM) will reach here.
- * Also fastopenq may already been allocated because this
- * socket was in TCP_LISTEN state previously but was
- * shutdown() (rather than close()).
- */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
- inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
- if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
- err = fastopen_init_queue(sk, backlog);
- else if ((sysctl_tcp_fastopen &
- TFO_SERVER_WO_SOCKOPT2) != 0)
- err = fastopen_init_queue(sk,
- ((uint)sysctl_tcp_fastopen) >> 16);
- else
- err = 0;
- if (err)
- goto out;
- }
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
sock_rps_record_flow(sk2);
WARN_ON(!((1 << sk2->sk_state) &
- (TCPF_ESTABLISHED | TCPF_SYN_RECV |
- TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
sock_graft(sk2, newsock);
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct sock *newsk;
- struct request_sock *req;
int error;
lock_sock(sk);
goto out_err;
/* Find already established connection */
- if (reqsk_queue_empty(queue)) {
+ if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
if (error)
goto out_err;
}
- req = reqsk_queue_remove(queue);
- newsk = req->sk;
-
- sk_acceptq_removed(sk);
- if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) {
- spin_lock_bh(&queue->fastopenq->lock);
- if (tcp_rsk(req)->listener) {
- /* We are still waiting for the final ACK from 3WHS
- * so can't free req now. Instead, we set req->sk to
- * NULL to signify that the child socket is taken
- * so reqsk_fastopen_remove() will free the req
- * when 3WHS finishes (or is aborted).
- */
- req->sk = NULL;
- req = NULL;
- }
- spin_unlock_bh(&queue->fastopenq->lock);
- }
+
+ newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
+ WARN_ON(newsk->sk_state == TCP_SYN_RECV);
out:
release_sock(sk);
- if (req)
- __reqsk_free(req);
return newsk;
out_err:
newsk = NULL;
- req = NULL;
*err = error;
goto out;
}
void inet_csk_listen_stop(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct request_sock *acc_req;
struct request_sock *req;
inet_csk_delete_keepalive_timer(sk);
/* make all the listen_opt local to us */
- acc_req = reqsk_queue_yank_acceptq(queue);
+ acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
* To be honest, we are not able to make either
* of the variants now. --ANK
*/
- reqsk_queue_destroy(queue);
+ reqsk_queue_destroy(&icsk->icsk_accept_queue);
while ((req = acc_req) != NULL) {
struct sock *child = req->sk;
percpu_counter_inc(sk->sk_prot->orphan_count);
- if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) {
- BUG_ON(tcp_sk(child)->fastopen_rsk != req);
- BUG_ON(sk != tcp_rsk(req)->listener);
-
- /* Paranoid, to prevent race condition if
- * an inbound pkt destined for child is
- * blocked by sock lock in tcp_v4_rcv().
- * Also to satisfy an assertion in
- * tcp_v4_destroy_sock().
- */
- tcp_sk(child)->fastopen_rsk = NULL;
- sock_put(sk);
- }
inet_csk_destroy_sock(child);
bh_unlock_sock(child);
sk_acceptq_removed(sk);
__reqsk_free(req);
}
- if (queue->fastopenq != NULL) {
- /* Free all the reqs queued in rskq_rst_head. */
- spin_lock_bh(&queue->fastopenq->lock);
- acc_req = queue->fastopenq->rskq_rst_head;
- queue->fastopenq->rskq_rst_head = NULL;
- spin_unlock_bh(&queue->fastopenq->lock);
- while ((req = acc_req) != NULL) {
- acc_req = req->dl_next;
- __reqsk_free(req);
- }
- }
WARN_ON(sk->sk_ack_backlog);
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
- SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
- SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
- SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
- SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
SNMP_MIB_SENTINEL
};
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
- treq->listener = NULL;
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
return 0;
}
-int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
- struct tcp_fastopen_context *ctxt;
- int ret;
- u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
-
- tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
- if (!tbl.data)
- return -ENOMEM;
-
- rcu_read_lock();
- ctxt = rcu_dereference(tcp_fastopen_ctx);
- if (ctxt)
- memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
- rcu_read_unlock();
-
- snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
- user_key[0], user_key[1], user_key[2], user_key[3]);
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
-
- if (write && ret == 0) {
- if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
- user_key + 2, user_key + 3) != 4) {
- ret = -EINVAL;
- goto bad_key;
- }
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
- }
-
-bad_key:
- pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
- user_key[0], user_key[1], user_key[2], user_key[3],
- (char *)tbl.data, ret);
- kfree(tbl.data);
- return ret;
-}
-
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "tcp_fastopen_key",
- .mode = 0600,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
- .proc_handler = proc_tcp_fastopen_key,
- },
{
.procname = "tcp_tw_recycle",
.data = &tcp_death_row.sysctl_tw_recycle,
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- /* Connected or passive Fast Open socket? */
- if (sk->sk_state != TCP_SYN_SENT &&
- (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) {
+ /* Connected? */
+ if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
if (tp->urg_seq == tp->copied_seq &&
ssize_t copied;
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /* Wait for a connection to finish. One exception is TCP Fast Open
- * (passive side) where data is allowed to be sent before a connection
- * is fully established.
- */
- if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
- !tcp_passive_fastopen(sk)) {
+ /* Wait for a connection to finish. */
+ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
goto out_err;
- }
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /* Wait for a connection to finish. One exception is TCP Fast Open
- * (passive side) where data is allowed to be sent before a connection
- * is fully established.
- */
- if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
- !tcp_passive_fastopen(sk)) {
+ /* Wait for a connection to finish. */
+ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
goto do_error;
- }
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
* they look as CLOSING or LAST_ACK for Linux)
* Probably, I missed some more holelets.
* --ANK
- * XXX (TFO) - To start off we don't support SYN+ACK+FIN
- * in a single packet! (May consider it later but will
- * probably need API support or TCP_CORK SYN-ACK until
- * data is written and socket is closed.)
*/
tcp_send_fin(sk);
}
}
}
- if (sk->sk_state == TCP_CLOSE) {
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
- /* We could get here with a non-NULL req if the socket is
- * aborted (e.g., closed with unread data) before 3WHS
- * finishes.
- */
- if (req != NULL)
- reqsk_fastopen_remove(sk, req, false);
+ if (sk->sk_state == TCP_CLOSE)
inet_csk_destroy_sock(sk);
- }
/* Otherwise, socket is reprieved until protocol close. */
out:
else
icsk->icsk_user_timeout = msecs_to_jiffies(val);
break;
-
- case TCP_FASTOPEN:
- if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
- TCPF_LISTEN)))
- err = fastopen_init_queue(sk, val);
- else
- err = -EINVAL;
- break;
default:
err = -ENOPROTOOPT;
break;
void tcp_done(struct sock *sk)
{
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
-
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
- if (req != NULL)
- reqsk_fastopen_remove(sk, req, false);
sk->sk_shutdown = SHUTDOWN_MASK;
-#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/tcp.h>
-#include <linux/rcupdate.h>
-#include <linux/rculist.h>
-#include <net/inetpeer.h>
-#include <net/tcp.h>
-int sysctl_tcp_fastopen __read_mostly;
-
-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-static void tcp_fastopen_ctx_free(struct rcu_head *head)
-{
- struct tcp_fastopen_context *ctx =
- container_of(head, struct tcp_fastopen_context, rcu);
- crypto_free_cipher(ctx->tfm);
- kfree(ctx);
-}
-
-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
-{
- int err;
- struct tcp_fastopen_context *ctx, *octx;
-
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
- ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
-
- if (IS_ERR(ctx->tfm)) {
- err = PTR_ERR(ctx->tfm);
-error: kfree(ctx);
- pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
- return err;
- }
- err = crypto_cipher_setkey(ctx->tfm, key, len);
- if (err) {
- pr_err("TCP: TFO cipher key error: %d\n", err);
- crypto_free_cipher(ctx->tfm);
- goto error;
- }
- memcpy(ctx->key, key, len);
-
- spin_lock(&tcp_fastopen_ctx_lock);
-
- octx = rcu_dereference_protected(tcp_fastopen_ctx,
- lockdep_is_held(&tcp_fastopen_ctx_lock));
- rcu_assign_pointer(tcp_fastopen_ctx, ctx);
- spin_unlock(&tcp_fastopen_ctx_lock);
-
- if (octx)
- call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
- return err;
-}
-
-/* Computes the fastopen cookie for the peer.
- * The peer address is a 128 bits long (pad with zeros for IPv4).
- *
- * The caller must check foc->len to determine if a valid cookie
- * has been generated successfully.
-*/
-void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc)
-{
- __be32 peer_addr[4] = { addr, 0, 0, 0 };
- struct tcp_fastopen_context *ctx;
-
- rcu_read_lock();
- ctx = rcu_dereference(tcp_fastopen_ctx);
- if (ctx) {
- crypto_cipher_encrypt_one(ctx->tfm,
- foc->val,
- (__u8 *)peer_addr);
- foc->len = TCP_FASTOPEN_COOKIE_SIZE;
- }
- rcu_read_unlock();
-}
+int sysctl_tcp_fastopen;
static int __init tcp_fastopen_init(void)
{
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
-
- get_random_bytes(key, sizeof(key));
- tcp_fastopen_reset_cipher(key, sizeof(key));
return 0;
}
/* 4. Try to fixup all. It is made immediately after connection enters
* established state.
*/
-void tcp_init_buffer_space(struct sock *sk)
+static void tcp_init_buffer_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
{
struct tcp_sock *tp = tcp_sk(sk);
- /* If the retrans timer is currently being used by Fast Open
- * for SYN-ACK retrans purpose, stay put.
- */
- if (tp->fastopen_rsk)
- return;
-
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
} else {
}
/* When we get a reset we do this. */
-void tcp_reset(struct sock *sk)
+static void tcp_reset(struct sock *sk)
{
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
tcp_send_synack(sk);
#if 0
/* Note, we could accept data and URG from this segment.
- * There are no obstacles to make this (except that we must
- * either change tcp_recvmsg() to prevent it from returning data
- * before 3WHS completes per RFC793, or employ TCP Fast Open).
+ * There are no obstacles to make this.
*
* However, if we ignore data in ACKless segments sometimes,
* we have no reasons to accept it sometimes.
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct request_sock *req;
int queued = 0;
tp->rx_opt.saw_tstamp = 0;
return 0;
}
- req = tp->fastopen_rsk;
- if (req != NULL) {
- BUG_ON(sk->sk_state != TCP_SYN_RECV &&
- sk->sk_state != TCP_FIN_WAIT1);
-
- if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
- goto discard;
- } else if (!tcp_validate_incoming(sk, skb, th, 0))
+ if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
/* step 5: check the ACK field */
switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
- /* Once we leave TCP_SYN_RECV, we no longer
- * need req so release it.
- */
- if (req) {
- reqsk_fastopen_remove(sk, req, false);
- } else {
- /* Make sure socket is routed, for
- * correct metrics.
- */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_init_congestion_control(sk);
-
- tcp_mtup_init(sk);
- tcp_init_buffer_space(sk);
- tp->copied_seq = tp->rcv_nxt;
- }
+ tp->copied_seq = tp->rcv_nxt;
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- if (req) {
- /* Re-arm the timer because data may
- * have been sent out. This is similar
- * to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more
- * aggressive and retranmitting any data
- * sooner based on when they were sent
- * out.
- */
- tcp_rearm_rto(sk);
- } else
- tcp_init_metrics(sk);
+ /* Make sure socket is routed, for
+ * correct metrics.
+ */
+ icsk->icsk_af_ops->rebuild_header(sk);
+
+ tcp_init_metrics(sk);
+
+ tcp_init_congestion_control(sk);
/* Prevent spurious tcp_cwnd_restart() on
* first data packet.
*/
tp->lsndtime = tcp_time_stamp;
+ tcp_mtup_init(sk);
tcp_initialize_rcv_mss(sk);
+ tcp_init_buffer_space(sk);
tcp_fast_path_on(tp);
} else {
return 1;
break;
case TCP_FIN_WAIT1:
- /* If we enter the TCP_FIN_WAIT1 state and we are a
- * Fast Open socket and this is the first acceptable
- * ACK we have received, this would have acknowledged
- * our SYNACK so stop the SYNACK timer.
- */
- if (acceptable && req != NULL) {
- /* We no longer need the request sock. */
- reqsk_fastopen_remove(sk, req, false);
- tcp_rearm_rto(sk);
- }
if (tp->snd_una == tp->write_seq) {
struct dst_entry *dst;
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
- struct request_sock *req;
__u32 seq;
__u32 remaining;
int err;
icsk = inet_csk(sk);
tp = tcp_sk(sk);
- req = tp->fastopen_rsk;
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt) &&
- (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
- /* For a Fast Open socket, allow seq to be snt_isn. */
+ !between(seq, tp->snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
!icsk->icsk_backoff)
break;
- /* XXX (TFO) - revisit the following logic for TFO */
-
if (sock_owned_by_user(sk))
break;
goto out;
}
- /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
- * than following the TCP_SYN_RECV case and closing the socket,
- * we ignore the ICMP error and keep trying like a fully established
- * socket. Is this the right thing to do?
- */
- if (req && req->sk == NULL)
- goto out;
-
switch (sk->sk_state) {
struct request_sock *req, **prev;
case TCP_LISTEN:
case TCP_SYN_SENT:
case TCP_SYN_RECV: /* Cannot happen.
- It can f.e. if SYNs crossed,
- or Fast Open.
+ It can f.e. if SYNs crossed.
*/
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
- * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
- */
- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
+ tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+ skb = tcp_make_synack(sk, dst, req, rvp);
if (skb) {
__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
};
#endif
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct tcp_fastopen_cookie *foc,
- struct tcp_fastopen_cookie *valid_foc)
-{
- bool skip_cookie = false;
- struct fastopen_queue *fastopenq;
-
- if (likely(!fastopen_cookie_present(foc))) {
- /* See include/net/tcp.h for the meaning of these knobs */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
- ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
- skip_cookie = true; /* no cookie to validate */
- else
- return false;
- }
- fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
- /* A FO option is present; bump the counter. */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-
- /* Make sure the listener has enabled fastopen, and we don't
- * exceed the max # of pending TFO requests allowed before trying
- * to validating the cookie in order to avoid burning CPU cycles
- * unnecessarily.
- *
- * XXX (TFO) - The implication of checking the max_qlen before
- * processing a cookie request is that clients can't differentiate
- * between qlen overflow causing Fast Open to be disabled
- * temporarily vs a server not supporting Fast Open at all.
- */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
- fastopenq == NULL || fastopenq->max_qlen == 0)
- return false;
-
- if (fastopenq->qlen >= fastopenq->max_qlen) {
- struct request_sock *req1;
- spin_lock(&fastopenq->lock);
- req1 = fastopenq->rskq_rst_head;
- if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
- spin_unlock(&fastopenq->lock);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
- /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
- foc->len = -1;
- return false;
- }
- fastopenq->rskq_rst_head = req1->dl_next;
- fastopenq->qlen--;
- spin_unlock(&fastopenq->lock);
- reqsk_free(req1);
- }
- if (skip_cookie) {
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- return true;
- }
- if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
- if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
- if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
- memcmp(&foc->val[0], &valid_foc->val[0],
- TCP_FASTOPEN_COOKIE_SIZE) != 0)
- return false;
- valid_foc->len = -1;
- }
- /* Acknowledge the data received from the peer. */
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- return true;
- } else if (foc->len == 0) { /* Client requesting a cookie */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENCOOKIEREQD);
- } else {
- /* Client sent a cookie with wrong size. Treat it
- * the same as invalid and return a valid one.
- */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
- }
- return false;
-}
-
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
- struct sk_buff *skb,
- struct sk_buff *skb_synack,
- struct request_sock *req,
- struct request_values *rvp)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct sock *child;
-
- req->retrans = 0;
- req->sk = NULL;
-
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
- if (child == NULL) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- kfree_skb(skb_synack);
- return -1;
- }
- ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
- ireq->rmt_addr, ireq->opt);
- /* XXX (TFO) - is it ok to ignore error and continue? */
-
- spin_lock(&queue->fastopenq->lock);
- queue->fastopenq->qlen++;
- spin_unlock(&queue->fastopenq->lock);
-
- /* Initialize the child socket. Have to fix some values to take
- * into account the child is a Fast Open socket and is created
- * only out of the bits carried in the SYN packet.
- */
- tp = tcp_sk(child);
-
- tp->fastopen_rsk = req;
- /* Do a hold on the listner sk so that if the listener is being
- * closed, the child that has been accepted can live on and still
- * access listen_lock.
- */
- sock_hold(sk);
- tcp_rsk(req)->listener = sk;
-
- /* RFC1323: The window in SYN & SYN/ACK segments is never
- * scaled. So correct it appropriately.
- */
- tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-
- /* Activate the retrans timer so that SYNACK can be retransmitted.
- * The request socket is not added to the SYN table of the parent
- * because it's been added to the accept queue directly.
- */
- inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-
- /* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, child);
-
- /* Now finish processing the fastopen child socket. */
- inet_csk(child)->icsk_af_ops->rebuild_header(child);
- tcp_init_congestion_control(child);
- tcp_mtup_init(child);
- tcp_init_buffer_space(child);
- tcp_init_metrics(child);
-
- /* Queue the data carried in the SYN packet. We need to first
- * bump skb's refcnt because the caller will attempt to free it.
- *
- * XXX (TFO) - we honor a zero-payload TFO request for now.
- * (Any reason not to?)
- */
- if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
- /* Don't queue the skb if there is no payload in SYN.
- * XXX (TFO) - How about SYN+FIN?
- */
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- } else {
- skb = skb_get(skb);
- skb_dst_drop(skb);
- __skb_pull(skb, tcp_hdr(skb)->doff * 4);
- skb_set_owner_r(skb, child);
- __skb_queue_tail(&child->sk_receive_queue, skb);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- }
- sk->sk_data_ready(sk, 0);
- bh_unlock_sock(child);
- sock_put(child);
- WARN_ON(req->sk == NULL);
- return 0;
-}
-
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_extend_values tmp_ext;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
bool want_cookie = false;
- struct flowi4 fl4;
- struct tcp_fastopen_cookie foc = { .len = -1 };
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
- struct sk_buff *skb_synack;
- int do_fastopen;
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
- want_cookie ? NULL : &foc);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
req->cookie_ts = tmp_opt.tstamp_ok;
} else if (!isn) {
+ struct flowi4 fl4;
+
/* VJ's idea. We save last timestamp seen
* from the destination in peer table, when entering
* state TIME-WAIT, and check against it before
tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->snt_synack = tcp_time_stamp;
- if (dst == NULL) {
- dst = inet_csk_route_req(sk, &fl4, req);
- if (dst == NULL)
- goto drop_and_free;
- }
- do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
-
- /* We don't call tcp_v4_send_synack() directly because we need
- * to make sure a child socket can be created successfully before
- * sending back synack!
- *
- * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
- * (or better yet, call tcp_send_synack() in the child context
- * directly, but will have to fix bunch of other code first)
- * after syn_recv_sock() except one will need to first fix the
- * latter to remove its dependency on the current implementation
- * of tcp_v4_send_synack()->tcp_select_initial_window().
- */
- skb_synack = tcp_make_synack(sk, dst, req,
- (struct request_values *)&tmp_ext,
- fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
-
- if (skb_synack) {
- __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
- skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
- } else
- goto drop_and_free;
-
- if (likely(!do_fastopen)) {
- int err;
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
- ireq->rmt_addr, ireq->opt);
- err = net_xmit_eval(err);
- if (err || want_cookie)
- goto drop_and_free;
-
- tcp_rsk(req)->listener = NULL;
- /* Add the request_sock to the SYN table */
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- if (fastopen_cookie_present(&foc) && foc.len != 0)
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
- (struct request_values *)&tmp_ext))
+ if (tcp_v4_send_synack(sk, dst, req,
+ (struct request_values *)&tmp_ext,
+ skb_get_queue_mapping(skb),
+ want_cookie) ||
+ want_cookie)
goto drop_and_free;
+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
drop_and_release:
struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
iph->saddr, iph->daddr);
if (req)
- return tcp_check_req(sk, skb, req, prev, false);
+ return tcp_check_req(sk, skb, req, prev);
nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
th->source, iph->daddr, th->dest, inet_iif(skb));
tcp_cookie_values_release);
tp->cookie_values = NULL;
}
- BUG_ON(tp->fastopen_rsk != NULL);
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct inet_sock *inet = inet_sk(sk);
- struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
__be32 dest = inet->inet_daddr;
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- sk->sk_state == TCP_LISTEN ?
- (fastopenq ? fastopenq->max_qlen : 0) :
- (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
+ tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
len);
}
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
- newtp->fastopen_rsk = NULL;
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
EXPORT_SYMBOL(tcp_create_openreq_child);
/*
- * Process an incoming packet for SYN_RECV sockets represented as a
- * request_sock. Normally sk is the listener socket but for TFO it
- * points to the child socket.
- *
- * XXX (TFO) - The current impl contains a special check for ack
- * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
+ * Process an incoming packet for SYN_RECV sockets represented
+ * as a request_sock.
*/
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct request_sock **prev,
- bool fastopen)
+ struct request_sock **prev)
{
struct tcp_options_received tmp_opt;
const u8 *hash_location;
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool paws_reject = false;
- BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
-
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
*
* Enforce "SYN-ACK" according to figure 8, figure 6
* of RFC793, fixed by RFC1122.
- *
- * Note that even if there is new data in the SYN packet
- * they will be thrown away too.
*/
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
return NULL;
* sent (the segment carries an unacceptable ACK) ...
* a reset is sent."
*
- * Invalid ACK: reset will be sent by listening socket.
- * Note that the ACK validity check for a Fast Open socket is done
- * elsewhere and is checked directly against the child socket rather
- * than req because user data may have been sent out.
+ * Invalid ACK: reset will be sent by listening socket
*/
- if ((flg & TCP_FLAG_ACK) && !fastopen &&
+ if ((flg & TCP_FLAG_ACK) &&
(TCP_SKB_CB(skb)->ack_seq !=
tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
return sk;
/* RFC793: "first check sequence number". */
if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
+ tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(sk, skb, req);
/* In sequence, PAWS is OK. */
- if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
+ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
req->ts_recent = tmp_opt.rcv_tsval;
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
- *
- * XXX (TFO) - if we ever allow "data after SYN", the
- * following check needs to be removed.
*/
if (!(flg & TCP_FLAG_ACK))
return NULL;
- /* For Fast Open no more processing is needed (sk is the
- * child socket).
- */
- if (fastopen)
- return sk;
-
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
}
embryonic_reset:
- if (!(flg & TCP_FLAG_RST)) {
- /* Received a bad SYN pkt - for TFO We try not to reset
- * the local connection unless it's really necessary to
- * avoid becoming vulnerable to outside attack aiming at
- * resetting legit local connections.
- */
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+ if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_reset(sk, skb);
- } else if (fastopen) { /* received a valid RST pkt */
- reqsk_fastopen_remove(sk, req, true);
- tcp_reset(sk);
- }
- if (!fastopen) {
- inet_csk_reqsk_queue_drop(sk, req, prev);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
- }
+
+ inet_csk_reqsk_queue_drop(sk, req, prev);
return NULL;
}
EXPORT_SYMBOL(tcp_check_req);
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
- *
- * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
- * when entering. But other states are possible due to a race condition
- * where after __inet_lookup_established() fails but before the listener
- * locked is obtained, other packets cause the same connection to
- * be created.
*/
int tcp_child_process(struct sock *parent, struct sock *child,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5,
- struct tcp_extend_values *xvp,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_extend_values *xvp)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
if (unlikely(!ireq->tstamp_ok))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
- if (foc != NULL) {
- u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
- need = (need + 3) & ~3U; /* Align to 32 bits */
- if (remaining >= need) {
- opts->options |= OPTION_FAST_OPEN_COOKIE;
- opts->fastopen_cookie = foc;
- remaining -= need;
- }
- }
+
/* Similar rationale to tcp_syn_options() applies here, too.
* If the <SYN> options fit, the same options should fit now!
*/
*/
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct request_values *rvp,
- struct tcp_fastopen_cookie *foc)
+ struct request_values *rvp)
{
struct tcp_out_options opts;
struct tcp_extend_values *xvp = tcp_xv(rvp);
#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
tcp_header_size = tcp_synack_options(sk, req, mss,
- skb, &opts, &md5, xvp, foc)
+ skb, &opts, &md5, xvp)
+ sizeof(*th);
skb_push(skb, tcp_header_size);
}
th->seq = htonl(TCP_SKB_CB(skb)->seq);
- /* XXX data is queued and acked as is. No buffer/window check */
- th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
+ th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
}
}
-/*
- * Timer for Fast Open socket to retransmit SYNACK. Note that the
- * sk here is the child socket, not the parent (listener) socket.
- */
-static void tcp_fastopen_synack_timer(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- int max_retries = icsk->icsk_syn_retries ? :
- sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
- struct request_sock *req;
-
- req = tcp_sk(sk)->fastopen_rsk;
- req->rsk_ops->syn_ack_timeout(sk, req);
-
- if (req->retrans >= max_retries) {
- tcp_write_err(sk);
- return;
- }
- /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
- * returned from rtx_syn_ack() to make it more persistent like
- * regular retransmit because if the child socket has been accepted
- * it's not good to give up too easily.
- */
- req->rsk_ops->rtx_syn_ack(sk, req, NULL);
- req->retrans++;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX);
-}
-
/*
* The TCP retransmit timer.
*/
tcp_resume_early_retransmit(sk);
return;
}
- if (tp->fastopen_rsk) {
- BUG_ON(sk->sk_state != TCP_SYN_RECV &&
- sk->sk_state != TCP_FIN_WAIT1);
- tcp_fastopen_synack_timer(sk);
- /* Before we receive ACK to our SYN-ACK don't retransmit
- * anything else (e.g., data or FIN segments).
- */
- return;
- }
+
if (!tp->packets_out)
goto out;
ireq = inet_rsk(req);
ireq6 = inet6_rsk(req);
treq = tcp_rsk(req);
- treq->listener = NULL;
if (security_inet_conn_request(sk, skb, req))
goto out_free;
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+ skb = tcp_make_synack(sk, dst, req, rvp);
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, inet6_iif(skb));
if (req)
- return tcp_check_req(sk, skb, req, prev, false);
+ return tcp_check_req(sk, skb, req, prev);
nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
want_cookie)
goto drop_and_free;
- tcp_rsk(req)->listener = NULL;
inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;