static void set_emss(struct c4iw_ep *ep, u16 opt)
{
- ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
+ ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
+ sizeof(struct iphdr) - sizeof(struct tcphdr);
ep->mss = ep->emss;
if (GET_TCPOPT_TSTAMP(opt))
ep->emss -= 12;
if (ep->emss < 128)
ep->emss = 128;
+ if (ep->emss & 7)
+ PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ GET_TCPOPT_MSS(opt), ep->mss, ep->emss);
PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
ep->mss, ep->emss);
}
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
+static void best_mtu(const unsigned short *mtus, unsigned short mtu,
+ unsigned int *idx, int use_ts)
+{
+ unsigned short hdr_size = sizeof(struct iphdr) +
+ sizeof(struct tcphdr) +
+ (use_ts ? 12 : 0);
+ unsigned short data_size = mtu - hdr_size;
+
+ cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+}
+
static int send_connect(struct c4iw_ep *ep)
{
struct cpl_act_open_req *req;
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
wscale = compute_wscale(rcv_win);
opt0 = (nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
req6->opt2 = cpu_to_be32(opt2);
}
} else {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ if (peer2peer)
+ isn += 4;
+
if (ep->com.remote_addr.ss_family == AF_INET) {
t5_req = (struct cpl_t5_act_open_req *)
skb_put(skb, wrlen);
cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t)));
+ t5_req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req->rsvd));
t5_req->opt2 = cpu_to_be32(opt2);
} else {
t5_req6 = (struct cpl_t5_act_open_req6 *)
cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
+ t5_req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req6->rsvd));
t5_req6->opt2 = cpu_to_be32(opt2);
}
}
htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
wscale = compute_wscale(rcv_win);
req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
(nocong ? NO_CONG(1) : 0) |
u64 opt0;
u32 opt2;
int wscale;
+ struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(*rpl));
+
skb_get(skb);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ rpl = cplhdr(skb);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp);
wscale = compute_wscale(rcv_win);
opt0 = (nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
opt2 |= CCTRL_ECN(1);
}
if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
opt2 |= T5_OPT_2_VALID;
opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ rpl5 = (void *)rpl;
+ memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
+ if (peer2peer)
+ isn += 4;
+ rpl5->iss = cpu_to_be32(isn);
+ PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
}
- rpl = cplhdr(skb);
- INIT_TP_WR(rpl, ep->hwtid);
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
rpl->opt0 = cpu_to_be64(opt0);
rpl->opt2 = cpu_to_be32(opt2);
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
int err;
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
+ unsigned short hdrs;
parent_ep = lookup_stid(t, stid);
if (!parent_ep) {
goto reject;
}
- if (peer_mss && child_ep->mtu > (peer_mss + 40))
- child_ep->mtu = peer_mss + 40;
+ hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
+ if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
+ child_ep->mtu = peer_mss + hdrs;
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
+#define CONG_CNTRL_VALID (1 << 18)
#define T5_OPT_2_VALID (1 << 31)
#endif /* _T4FW_RI_API_H_ */
}
EXPORT_SYMBOL(cxgb4_best_mtu);
+/**
+ * cxgb4_best_aligned_mtu - find best MTU, [hopefully] data size aligned
+ * @mtus: the HW MTU table
+ * @header_size: Header Size
+ * @data_size_max: maximum Data Segment Size
+ * @data_size_align: desired Data Segment Size Alignment (2^N)
+ * @mtu_idxp: HW MTU Table Index return value pointer (possibly NULL)
+ *
+ * Similar to cxgb4_best_mtu() but instead of searching the Hardware
+ * MTU Table based solely on a Maximum MTU parameter, we break that
+ * parameter up into a Header Size and Maximum Data Segment Size, and
+ * provide a desired Data Segment Size Alignment. If we find an MTU in
+ * the Hardware MTU Table which will result in a Data Segment Size with
+ * the requested alignment _and_ that MTU isn't "too far" from the
+ * closest MTU, then we'll return that rather than the closest MTU.
+ */
+unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
+ unsigned short header_size,
+ unsigned short data_size_max,
+ unsigned short data_size_align,
+ unsigned int *mtu_idxp)
+{
+ unsigned short max_mtu = header_size + data_size_max;
+ unsigned short data_size_align_mask = data_size_align - 1;
+ int mtu_idx, aligned_mtu_idx;
+
+ /* Scan the MTU Table till we find an MTU which is larger than our
+ * Maximum MTU or we reach the end of the table. Along the way,
+ * record the last MTU found, if any, which will result in a Data
+ * Segment Length matching the requested alignment.
+ */
+ for (mtu_idx = 0, aligned_mtu_idx = -1; mtu_idx < NMTUS; mtu_idx++) {
+ unsigned short data_size = mtus[mtu_idx] - header_size;
+
+ /* If this MTU minus the Header Size would result in a
+ * Data Segment Size of the desired alignment, remember it.
+ */
+ if ((data_size & data_size_align_mask) == 0)
+ aligned_mtu_idx = mtu_idx;
+
+ /* If we're not at the end of the Hardware MTU Table and the
+ * next element is larger than our Maximum MTU, drop out of
+ * the loop.
+ */
+ if (mtu_idx+1 < NMTUS && mtus[mtu_idx+1] > max_mtu)
+ break;
+ }
+
+ /* If we fell out of the loop because we ran to the end of the table,
+ * then we just have to use the last [largest] entry.
+ */
+ if (mtu_idx == NMTUS)
+ mtu_idx--;
+
+ /* If we found an MTU which resulted in the requested Data Segment
+ * Length alignment and that's "not far" from the largest MTU which is
+ * less than or equal to the maximum MTU, then use that.
+ */
+ if (aligned_mtu_idx >= 0 &&
+ mtu_idx - aligned_mtu_idx <= 1)
+ mtu_idx = aligned_mtu_idx;
+
+ /* If the caller has passed in an MTU Index pointer, pass the
+ * MTU Index back. Return the MTU value.
+ */
+ if (mtu_idxp)
+ *mtu_idxp = mtu_idx;
+ return mtus[mtu_idx];
+}
+EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
+
/**
* cxgb4_port_chan - get the HW channel of a port
* @dev: the net device for the port
#undef FW_PARAM_PFVF
#undef FW_PARAM_DEV
- /*
- * These are finalized by FW initialization, load their values now.
+ /* The MTU/MSS Table is initialized by now, so load their values. If
+ * we're initializing the adapter, then we'll make any modifications
+ * we want to the MTU/MSS Table and also initialize the congestion
+ * parameters.
*/
t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
- t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
- adap->params.b_wnd);
+ if (state != DEV_STATE_INIT) {
+ int i;
+
+ /* The default MTU Table contains values 1492 and 1500.
+ * However, for TCP, it's better to have two values which are
+ * a multiple of 8 +/- 4 bytes apart near this popular MTU.
+ * This allows us to have a TCP Data Payload which is a
+ * multiple of 8 regardless of what combination of TCP Options
+ * are in use (always a multiple of 4 bytes) which is
+ * important for performance reasons. For instance, if no
+ * options are in use, then we have a 20-byte IP header and a
+ * 20-byte TCP header. In this case, a 1500-byte MSS would
+ * result in a TCP Data Payload of 1500 - 40 == 1460 bytes
+ * which is not a multiple of 8. So using an MSS of 1488 in
+ * this case results in a TCP Data Payload of 1448 bytes which
+ * is a multiple of 8. On the other hand, if 12-byte TCP Time
+ * Stamps have been negotiated, then an MTU of 1500 bytes
+ * results in a TCP Data Payload of 1448 bytes which, as
+ * above, is a multiple of 8 bytes ...
+ */
+ for (i = 0; i < NMTUS; i++)
+ if (adap->params.mtus[i] == 1492) {
+ adap->params.mtus[i] = 1488;
+ break;
+ }
+ t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
+ adap->params.b_wnd);
+ }
t4_init_tp_params(adap);
adap->flags |= FW_OK;
return 0;
unsigned int cxgb4_port_idx(const struct net_device *dev);
unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
unsigned int *idx);
+unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
+ unsigned short header_size,
+ unsigned short data_size_max,
+ unsigned short data_size_align,
+ unsigned int *mtu_idxp);
void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
struct tp_tcp_stats *v6);
void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
__be64 opt0;
};
+struct cpl_t5_pass_accept_rpl {
+ WR_HDR;
+ union opcode_tid ot;
+ __be32 opt2;
+ __be64 opt0;
+ __be32 iss;
+ __be32 rsvd;
+};
+
struct cpl_act_open_req {
WR_HDR;
union opcode_tid ot;