2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/vxlan.h>
52 MLX5E_TC_FLOW_ESWITCH = BIT(0),
55 struct mlx5e_tc_flow {
56 struct rhash_head node;
59 struct mlx5_flow_handle *rule;
60 struct list_head encap; /* flows sharing the same encap */
61 struct mlx5_esw_flow_attr *attr;
65 MLX5_HEADER_TYPE_VXLAN = 0x0,
66 MLX5_HEADER_TYPE_NVGRE = 0x1,
69 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
70 #define MLX5E_TC_TABLE_NUM_GROUPS 4
72 static struct mlx5_flow_handle *
73 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
74 struct mlx5_flow_spec *spec,
75 u32 action, u32 flow_tag)
77 struct mlx5_core_dev *dev = priv->mdev;
78 struct mlx5_flow_destination dest = { 0 };
79 struct mlx5_flow_act flow_act = {
84 struct mlx5_fc *counter = NULL;
85 struct mlx5_flow_handle *rule;
86 bool table_created = false;
88 if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
89 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
90 dest.ft = priv->fs.vlan.ft.t;
91 } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
92 counter = mlx5_fc_create(dev, true);
94 return ERR_CAST(counter);
96 dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
97 dest.counter = counter;
100 if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
102 mlx5_create_auto_grouped_flow_table(priv->fs.ns,
104 MLX5E_TC_TABLE_NUM_ENTRIES,
105 MLX5E_TC_TABLE_NUM_GROUPS,
107 if (IS_ERR(priv->fs.tc.t)) {
108 netdev_err(priv->netdev,
109 "Failed to create tc offload table\n");
110 rule = ERR_CAST(priv->fs.tc.t);
114 table_created = true;
117 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
118 rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1);
127 mlx5_destroy_flow_table(priv->fs.tc.t);
128 priv->fs.tc.t = NULL;
131 mlx5_fc_destroy(dev, counter);
136 static struct mlx5_flow_handle *
137 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
138 struct mlx5_flow_spec *spec,
139 struct mlx5_esw_flow_attr *attr)
141 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
144 err = mlx5_eswitch_add_vlan_action(esw, attr);
148 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
151 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
152 struct mlx5e_tc_flow *flow) {
153 struct list_head *next = flow->encap.next;
155 list_del(&flow->encap);
156 if (list_empty(next)) {
157 struct mlx5_encap_entry *e;
159 e = list_entry(next, struct mlx5_encap_entry, flows);
161 mlx5_encap_dealloc(priv->mdev, e->encap_id);
164 hlist_del_rcu(&e->encap_hlist);
169 /* we get here also when setting rule to the FW failed, etc. It means that the
170 * flow rule itself might not exist, but some offloading related to the actions
173 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
174 struct mlx5e_tc_flow *flow)
176 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
177 struct mlx5_fc *counter = NULL;
179 if (!IS_ERR(flow->rule)) {
180 counter = mlx5_flow_rule_counter(flow->rule);
181 mlx5_del_flow_rules(flow->rule);
182 mlx5_fc_destroy(priv->mdev, counter);
185 if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
186 mlx5_eswitch_del_vlan_action(esw, flow->attr);
187 if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
188 mlx5e_detach_encap(priv, flow);
191 if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
192 mlx5_destroy_flow_table(priv->fs.tc.t);
193 priv->fs.tc.t = NULL;
197 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
198 struct tc_cls_flower_offload *f)
200 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
202 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
204 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
206 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
209 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
210 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
212 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
213 struct flow_dissector_key_keyid *key =
214 skb_flow_dissector_target(f->dissector,
215 FLOW_DISSECTOR_KEY_ENC_KEYID,
217 struct flow_dissector_key_keyid *mask =
218 skb_flow_dissector_target(f->dissector,
219 FLOW_DISSECTOR_KEY_ENC_KEYID,
221 MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
222 be32_to_cpu(mask->keyid));
223 MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
224 be32_to_cpu(key->keyid));
228 static int parse_tunnel_attr(struct mlx5e_priv *priv,
229 struct mlx5_flow_spec *spec,
230 struct tc_cls_flower_offload *f)
232 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
234 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
237 struct flow_dissector_key_control *enc_control =
238 skb_flow_dissector_target(f->dissector,
239 FLOW_DISSECTOR_KEY_ENC_CONTROL,
242 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
243 struct flow_dissector_key_ports *key =
244 skb_flow_dissector_target(f->dissector,
245 FLOW_DISSECTOR_KEY_ENC_PORTS,
247 struct flow_dissector_key_ports *mask =
248 skb_flow_dissector_target(f->dissector,
249 FLOW_DISSECTOR_KEY_ENC_PORTS,
252 /* Full udp dst port must be given */
253 if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
254 goto vxlan_match_offload_err;
256 if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
257 MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
258 parse_vxlan_attr(spec, f);
260 netdev_warn(priv->netdev,
261 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
265 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
266 udp_dport, ntohs(mask->dst));
267 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
268 udp_dport, ntohs(key->dst));
270 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
271 udp_sport, ntohs(mask->src));
272 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
273 udp_sport, ntohs(key->src));
274 } else { /* udp dst port must be given */
275 vxlan_match_offload_err:
276 netdev_warn(priv->netdev,
277 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
281 if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
282 struct flow_dissector_key_ipv4_addrs *key =
283 skb_flow_dissector_target(f->dissector,
284 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
286 struct flow_dissector_key_ipv4_addrs *mask =
287 skb_flow_dissector_target(f->dissector,
288 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
290 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
291 src_ipv4_src_ipv6.ipv4_layout.ipv4,
293 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
294 src_ipv4_src_ipv6.ipv4_layout.ipv4,
297 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
298 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
300 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
301 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
304 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
305 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
306 } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
307 struct flow_dissector_key_ipv6_addrs *key =
308 skb_flow_dissector_target(f->dissector,
309 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
311 struct flow_dissector_key_ipv6_addrs *mask =
312 skb_flow_dissector_target(f->dissector,
313 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
316 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
317 src_ipv4_src_ipv6.ipv6_layout.ipv6),
318 &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
319 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
320 src_ipv4_src_ipv6.ipv6_layout.ipv6),
321 &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
323 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
324 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
325 &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
326 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
327 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
328 &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
330 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
331 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
334 /* Enforce DMAC when offloading incoming tunneled flows.
335 * Flow counters require a match on the DMAC.
337 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
338 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
339 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
340 dmac_47_16), priv->netdev->dev_addr);
342 /* let software handle IP fragments */
343 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
344 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
349 static int __parse_cls_flower(struct mlx5e_priv *priv,
350 struct mlx5_flow_spec *spec,
351 struct tc_cls_flower_offload *f,
354 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
356 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
361 *min_inline = MLX5_INLINE_MODE_L2;
363 if (f->dissector->used_keys &
364 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
365 BIT(FLOW_DISSECTOR_KEY_BASIC) |
366 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
367 BIT(FLOW_DISSECTOR_KEY_VLAN) |
368 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
369 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
370 BIT(FLOW_DISSECTOR_KEY_PORTS) |
371 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
372 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
373 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
374 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
375 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
376 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
377 f->dissector->used_keys);
381 if ((dissector_uses_key(f->dissector,
382 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
383 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
384 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
385 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
386 struct flow_dissector_key_control *key =
387 skb_flow_dissector_target(f->dissector,
388 FLOW_DISSECTOR_KEY_ENC_CONTROL,
390 switch (key->addr_type) {
391 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
392 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
393 if (parse_tunnel_attr(priv, spec, f))
400 /* In decap flow, header pointers should point to the inner
401 * headers, outer header were already set by parse_tunnel_attr
403 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
405 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
409 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
410 struct flow_dissector_key_control *key =
411 skb_flow_dissector_target(f->dissector,
412 FLOW_DISSECTOR_KEY_CONTROL,
415 struct flow_dissector_key_control *mask =
416 skb_flow_dissector_target(f->dissector,
417 FLOW_DISSECTOR_KEY_CONTROL,
419 addr_type = key->addr_type;
421 if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
422 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
423 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
424 key->flags & FLOW_DIS_IS_FRAGMENT);
426 /* the HW doesn't need L3 inline to match on frag=no */
427 if (key->flags & FLOW_DIS_IS_FRAGMENT)
428 *min_inline = MLX5_INLINE_MODE_IP;
432 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
433 struct flow_dissector_key_basic *key =
434 skb_flow_dissector_target(f->dissector,
435 FLOW_DISSECTOR_KEY_BASIC,
437 struct flow_dissector_key_basic *mask =
438 skb_flow_dissector_target(f->dissector,
439 FLOW_DISSECTOR_KEY_BASIC,
441 ip_proto = key->ip_proto;
443 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
444 ntohs(mask->n_proto));
445 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
446 ntohs(key->n_proto));
448 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
450 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
454 *min_inline = MLX5_INLINE_MODE_IP;
457 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
458 struct flow_dissector_key_eth_addrs *key =
459 skb_flow_dissector_target(f->dissector,
460 FLOW_DISSECTOR_KEY_ETH_ADDRS,
462 struct flow_dissector_key_eth_addrs *mask =
463 skb_flow_dissector_target(f->dissector,
464 FLOW_DISSECTOR_KEY_ETH_ADDRS,
467 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
470 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
474 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
477 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
482 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
483 struct flow_dissector_key_vlan *key =
484 skb_flow_dissector_target(f->dissector,
485 FLOW_DISSECTOR_KEY_VLAN,
487 struct flow_dissector_key_vlan *mask =
488 skb_flow_dissector_target(f->dissector,
489 FLOW_DISSECTOR_KEY_VLAN,
491 if (mask->vlan_id || mask->vlan_priority) {
492 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
493 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
495 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
496 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
498 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
499 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
503 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
504 struct flow_dissector_key_ipv4_addrs *key =
505 skb_flow_dissector_target(f->dissector,
506 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
508 struct flow_dissector_key_ipv4_addrs *mask =
509 skb_flow_dissector_target(f->dissector,
510 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
513 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
514 src_ipv4_src_ipv6.ipv4_layout.ipv4),
515 &mask->src, sizeof(mask->src));
516 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
517 src_ipv4_src_ipv6.ipv4_layout.ipv4),
518 &key->src, sizeof(key->src));
519 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
520 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
521 &mask->dst, sizeof(mask->dst));
522 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
523 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
524 &key->dst, sizeof(key->dst));
526 if (mask->src || mask->dst)
527 *min_inline = MLX5_INLINE_MODE_IP;
530 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
531 struct flow_dissector_key_ipv6_addrs *key =
532 skb_flow_dissector_target(f->dissector,
533 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
535 struct flow_dissector_key_ipv6_addrs *mask =
536 skb_flow_dissector_target(f->dissector,
537 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
540 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
541 src_ipv4_src_ipv6.ipv6_layout.ipv6),
542 &mask->src, sizeof(mask->src));
543 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
544 src_ipv4_src_ipv6.ipv6_layout.ipv6),
545 &key->src, sizeof(key->src));
547 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
548 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
549 &mask->dst, sizeof(mask->dst));
550 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
551 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
552 &key->dst, sizeof(key->dst));
554 if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
555 ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
556 *min_inline = MLX5_INLINE_MODE_IP;
559 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
560 struct flow_dissector_key_ports *key =
561 skb_flow_dissector_target(f->dissector,
562 FLOW_DISSECTOR_KEY_PORTS,
564 struct flow_dissector_key_ports *mask =
565 skb_flow_dissector_target(f->dissector,
566 FLOW_DISSECTOR_KEY_PORTS,
570 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
571 tcp_sport, ntohs(mask->src));
572 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
573 tcp_sport, ntohs(key->src));
575 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
576 tcp_dport, ntohs(mask->dst));
577 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
578 tcp_dport, ntohs(key->dst));
582 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
583 udp_sport, ntohs(mask->src));
584 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
585 udp_sport, ntohs(key->src));
587 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
588 udp_dport, ntohs(mask->dst));
589 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
590 udp_dport, ntohs(key->dst));
593 netdev_err(priv->netdev,
594 "Only UDP and TCP transport are supported\n");
598 if (mask->src || mask->dst)
599 *min_inline = MLX5_INLINE_MODE_TCP_UDP;
605 static int parse_cls_flower(struct mlx5e_priv *priv,
606 struct mlx5e_tc_flow *flow,
607 struct mlx5_flow_spec *spec,
608 struct tc_cls_flower_offload *f)
610 struct mlx5_core_dev *dev = priv->mdev;
611 struct mlx5_eswitch *esw = dev->priv.eswitch;
612 struct mlx5_eswitch_rep *rep = priv->ppriv;
616 err = __parse_cls_flower(priv, spec, f, &min_inline);
618 if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
619 rep->vport != FDB_UPLINK_VPORT) {
620 if (min_inline > esw->offloads.inline_mode) {
621 netdev_warn(priv->netdev,
622 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
623 min_inline, esw->offloads.inline_mode);
631 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
632 u32 *action, u32 *flow_tag)
634 const struct tc_action *a;
637 if (tc_no_actions(exts))
640 *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
643 tcf_exts_to_list(exts, &actions);
644 list_for_each_entry(a, &actions, list) {
645 /* Only support a single action per rule */
649 if (is_tcf_gact_shot(a)) {
650 *action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
651 if (MLX5_CAP_FLOWTABLE(priv->mdev,
652 flow_table_properties_nic_receive.flow_counter))
653 *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
657 if (is_tcf_skbedit_mark(a)) {
658 u32 mark = tcf_skbedit_mark(a);
660 if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
661 netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
667 *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
677 static inline int cmp_encap_info(struct ip_tunnel_key *a,
678 struct ip_tunnel_key *b)
680 return memcmp(a, b, sizeof(*a));
683 static inline int hash_encap_info(struct ip_tunnel_key *key)
685 return jhash(key, sizeof(*key), 0);
688 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
689 struct net_device *mirred_dev,
690 struct net_device **out_dev,
692 struct neighbour **out_n,
695 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
697 struct neighbour *n = NULL;
699 #if IS_ENABLED(CONFIG_INET)
702 rt = ip_route_output_key(dev_net(mirred_dev), fl4);
703 ret = PTR_ERR_OR_ZERO(rt);
709 /* if the egress device isn't on the same HW e-switch, we use the uplink */
710 if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
711 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
713 *out_dev = rt->dst.dev;
715 *out_ttl = ip4_dst_hoplimit(&rt->dst);
716 n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
725 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
726 struct net_device *mirred_dev,
727 struct net_device **out_dev,
729 struct neighbour **out_n,
732 struct neighbour *n = NULL;
733 struct dst_entry *dst;
735 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
736 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
739 dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
746 *out_ttl = ip6_dst_hoplimit(dst);
748 /* if the egress device isn't on the same HW e-switch, we use the uplink */
749 if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
750 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
757 n = dst_neigh_lookup(dst, &fl6->daddr);
766 static int gen_vxlan_header_ipv4(struct net_device *out_dev,
768 unsigned char h_dest[ETH_ALEN],
775 int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
776 struct ethhdr *eth = (struct ethhdr *)buf;
777 struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
778 struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
779 struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
781 memset(buf, 0, encap_size);
783 ether_addr_copy(eth->h_dest, h_dest);
784 ether_addr_copy(eth->h_source, out_dev->dev_addr);
785 eth->h_proto = htons(ETH_P_IP);
791 ip->protocol = IPPROTO_UDP;
795 udp->dest = udp_dst_port;
796 vxh->vx_flags = VXLAN_HF_VNI;
797 vxh->vx_vni = vxlan_vni_field(vx_vni);
802 static int gen_vxlan_header_ipv6(struct net_device *out_dev,
804 unsigned char h_dest[ETH_ALEN],
806 struct in6_addr *daddr,
807 struct in6_addr *saddr,
811 int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
812 struct ethhdr *eth = (struct ethhdr *)buf;
813 struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
814 struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
815 struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
817 memset(buf, 0, encap_size);
819 ether_addr_copy(eth->h_dest, h_dest);
820 ether_addr_copy(eth->h_source, out_dev->dev_addr);
821 eth->h_proto = htons(ETH_P_IPV6);
823 ip6_flow_hdr(ip6h, 0, 0);
824 /* the HW fills up ipv6 payload len */
825 ip6h->nexthdr = IPPROTO_UDP;
826 ip6h->hop_limit = ttl;
827 ip6h->daddr = *daddr;
828 ip6h->saddr = *saddr;
830 udp->dest = udp_dst_port;
831 vxh->vx_flags = VXLAN_HF_VNI;
832 vxh->vx_vni = vxlan_vni_field(vx_vni);
837 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
838 struct net_device *mirred_dev,
839 struct mlx5_encap_entry *e,
840 struct net_device **out_dev)
842 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
843 struct ip_tunnel_key *tun_key = &e->tun_info.key;
844 int encap_size, ttl, err;
845 struct neighbour *n = NULL;
846 struct flowi4 fl4 = {};
849 encap_header = kzalloc(max_encap_size, GFP_KERNEL);
853 switch (e->tunnel_type) {
854 case MLX5_HEADER_TYPE_VXLAN:
855 fl4.flowi4_proto = IPPROTO_UDP;
856 fl4.fl4_dport = tun_key->tp_dst;
862 fl4.flowi4_tos = tun_key->tos;
863 fl4.daddr = tun_key->u.ipv4.dst;
864 fl4.saddr = tun_key->u.ipv4.src;
866 err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
871 if (!(n->nud_state & NUD_VALID)) {
872 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr);
878 e->out_dev = *out_dev;
880 neigh_ha_snapshot(e->h_dest, n, *out_dev);
882 switch (e->tunnel_type) {
883 case MLX5_HEADER_TYPE_VXLAN:
884 encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
887 fl4.saddr, tun_key->tp_dst,
888 tunnel_id_to_key32(tun_key->tun_id));
895 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
896 encap_size, encap_header, &e->encap_id);
904 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
905 struct net_device *mirred_dev,
906 struct mlx5_encap_entry *e,
907 struct net_device **out_dev)
910 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
911 struct ip_tunnel_key *tun_key = &e->tun_info.key;
912 int encap_size, err, ttl = 0;
913 struct neighbour *n = NULL;
914 struct flowi6 fl6 = {};
917 encap_header = kzalloc(max_encap_size, GFP_KERNEL);
921 switch (e->tunnel_type) {
922 case MLX5_HEADER_TYPE_VXLAN:
923 fl6.flowi6_proto = IPPROTO_UDP;
924 fl6.fl6_dport = tun_key->tp_dst;
931 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
932 fl6.daddr = tun_key->u.ipv6.dst;
933 fl6.saddr = tun_key->u.ipv6.src;
935 err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev,
940 if (!(n->nud_state & NUD_VALID)) {
941 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr);
947 e->out_dev = *out_dev;
949 neigh_ha_snapshot(e->h_dest, n, *out_dev);
951 switch (e->tunnel_type) {
952 case MLX5_HEADER_TYPE_VXLAN:
953 encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
956 &fl6.saddr, tun_key->tp_dst,
957 tunnel_id_to_key32(tun_key->tun_id));
964 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
965 encap_size, encap_header, &e->encap_id);
973 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
974 struct ip_tunnel_info *tun_info,
975 struct net_device *mirred_dev,
976 struct mlx5_esw_flow_attr *attr)
978 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
979 unsigned short family = ip_tunnel_info_af(tun_info);
980 struct ip_tunnel_key *key = &tun_info->key;
981 struct mlx5_encap_entry *e;
982 struct net_device *out_dev;
983 int tunnel_type, err = -EOPNOTSUPP;
987 /* udp dst port must be set */
988 if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
989 goto vxlan_encap_offload_err;
991 /* setting udp src port isn't supported */
992 if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
993 vxlan_encap_offload_err:
994 netdev_warn(priv->netdev,
995 "must set udp dst port and not set udp src port\n");
999 if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
1000 MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
1001 tunnel_type = MLX5_HEADER_TYPE_VXLAN;
1003 netdev_warn(priv->netdev,
1004 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
1008 hash_key = hash_encap_info(key);
1010 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
1011 encap_hlist, hash_key) {
1012 if (!cmp_encap_info(&e->tun_info.key, key)) {
1023 e = kzalloc(sizeof(*e), GFP_KERNEL);
1027 e->tun_info = *tun_info;
1028 e->tunnel_type = tunnel_type;
1029 INIT_LIST_HEAD(&e->flows);
1031 if (family == AF_INET)
1032 err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
1033 else if (family == AF_INET6)
1034 err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev);
1040 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
1049 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1050 struct mlx5e_tc_flow *flow)
1052 struct mlx5_esw_flow_attr *attr = flow->attr;
1053 struct ip_tunnel_info *info = NULL;
1054 const struct tc_action *a;
1059 if (tc_no_actions(exts))
1062 memset(attr, 0, sizeof(*attr));
1063 attr->in_rep = priv->ppriv;
1065 tcf_exts_to_list(exts, &actions);
1066 list_for_each_entry(a, &actions, list) {
1067 if (is_tcf_gact_shot(a)) {
1068 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
1069 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1073 if (is_tcf_mirred_egress_redirect(a)) {
1074 int ifindex = tcf_mirred_ifindex(a);
1075 struct net_device *out_dev;
1076 struct mlx5e_priv *out_priv;
1078 out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
1080 if (switchdev_port_same_parent_id(priv->netdev,
1082 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1083 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1084 out_priv = netdev_priv(out_dev);
1085 attr->out_rep = out_priv->ppriv;
1087 err = mlx5e_attach_encap(priv, info,
1091 list_add(&flow->encap, &attr->encap->flows);
1092 attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
1093 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1094 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1095 out_priv = netdev_priv(attr->encap->out_dev);
1096 attr->out_rep = out_priv->ppriv;
1098 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1099 priv->netdev->name, out_dev->name);
1105 if (is_tcf_tunnel_set(a)) {
1106 info = tcf_tunnel_info(a);
1114 if (is_tcf_vlan(a)) {
1115 if (tcf_vlan_action(a) == VLAN_F_POP) {
1116 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
1117 } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
1118 if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
1121 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
1122 attr->vlan = tcf_vlan_push_vid(a);
1127 if (is_tcf_tunnel_release(a)) {
1128 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1137 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
1138 struct tc_cls_flower_offload *f)
1140 struct mlx5e_tc_table *tc = &priv->fs.tc;
1141 int err, attr_size = 0;
1142 u32 flow_tag, action;
1143 struct mlx5e_tc_flow *flow;
1144 struct mlx5_flow_spec *spec;
1145 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1148 if (esw && esw->mode == SRIOV_OFFLOADS) {
1149 flow_flags = MLX5E_TC_FLOW_ESWITCH;
1150 attr_size = sizeof(struct mlx5_esw_flow_attr);
1153 flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
1154 spec = mlx5_vzalloc(sizeof(*spec));
1155 if (!spec || !flow) {
1160 flow->cookie = f->cookie;
1161 flow->flags = flow_flags;
1163 err = parse_cls_flower(priv, flow, spec, f);
1167 if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1168 flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1);
1169 err = parse_tc_fdb_actions(priv, f->exts, flow);
1172 flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
1174 err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
1177 flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
1180 if (IS_ERR(flow->rule)) {
1181 err = PTR_ERR(flow->rule);
1185 err = rhashtable_insert_fast(&tc->ht, &flow->node,
1193 mlx5e_tc_del_flow(priv, flow);
1202 int mlx5e_delete_flower(struct mlx5e_priv *priv,
1203 struct tc_cls_flower_offload *f)
1205 struct mlx5e_tc_flow *flow;
1206 struct mlx5e_tc_table *tc = &priv->fs.tc;
1208 flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1213 rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
1215 mlx5e_tc_del_flow(priv, flow);
1223 int mlx5e_stats_flower(struct mlx5e_priv *priv,
1224 struct tc_cls_flower_offload *f)
1226 struct mlx5e_tc_table *tc = &priv->fs.tc;
1227 struct mlx5e_tc_flow *flow;
1228 struct tc_action *a;
1229 struct mlx5_fc *counter;
1235 flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1240 counter = mlx5_flow_rule_counter(flow->rule);
1244 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
1248 tcf_exts_to_list(f->exts, &actions);
1249 list_for_each_entry(a, &actions, list)
1250 tcf_action_stats_update(a, bytes, packets, lastuse);
1257 static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
1258 .head_offset = offsetof(struct mlx5e_tc_flow, node),
1259 .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
1260 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
1261 .automatic_shrinking = true,
1264 int mlx5e_tc_init(struct mlx5e_priv *priv)
1266 struct mlx5e_tc_table *tc = &priv->fs.tc;
1268 tc->ht_params = mlx5e_tc_flow_ht_params;
1269 return rhashtable_init(&tc->ht, &tc->ht_params);
1272 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
1274 struct mlx5e_tc_flow *flow = ptr;
1275 struct mlx5e_priv *priv = arg;
1277 mlx5e_tc_del_flow(priv, flow);
1281 void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
1283 struct mlx5e_tc_table *tc = &priv->fs.tc;
1285 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
1287 if (!IS_ERR_OR_NULL(tc->t)) {
1288 mlx5_destroy_flow_table(tc->t);