2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <net/netevent.h>
45 #include <net/neighbour.h>
47 #include <net/ip_fib.h>
53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
54 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
58 struct mlxsw_sp_prefix_usage *prefix_usage2)
62 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
63 if (!test_bit(prefix, prefix_usage2->b))
70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
71 struct mlxsw_sp_prefix_usage *prefix_usage2)
73 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
79 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
81 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
86 struct mlxsw_sp_prefix_usage *prefix_usage2)
88 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
94 memset(prefix_usage, 0, sizeof(*prefix_usage));
98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
99 unsigned char prefix_len)
101 set_bit(prefix_len, prefix_usage->b);
105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
106 unsigned char prefix_len)
108 clear_bit(prefix_len, prefix_usage->b);
111 struct mlxsw_sp_fib_key {
112 unsigned char addr[sizeof(struct in6_addr)];
113 unsigned char prefix_len;
116 enum mlxsw_sp_fib_entry_type {
117 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
122 struct mlxsw_sp_nexthop_group;
124 struct mlxsw_sp_fib_node {
125 struct list_head entry_list;
126 struct list_head list;
127 struct rhash_head ht_node;
128 struct mlxsw_sp_vr *vr;
129 struct mlxsw_sp_fib_key key;
132 struct mlxsw_sp_fib_entry_params {
139 struct mlxsw_sp_fib_entry {
140 struct list_head list;
141 struct mlxsw_sp_fib_node *fib_node;
142 enum mlxsw_sp_fib_entry_type type;
143 struct list_head nexthop_group_node;
144 struct mlxsw_sp_nexthop_group *nh_group;
145 struct mlxsw_sp_fib_entry_params params;
149 struct mlxsw_sp_fib {
150 struct rhashtable ht;
151 struct list_head node_list;
152 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
153 struct mlxsw_sp_prefix_usage prefix_usage;
156 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
160 struct mlxsw_sp_fib *fib;
163 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
165 return ERR_PTR(-ENOMEM);
166 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
168 goto err_rhashtable_init;
169 INIT_LIST_HEAD(&fib->node_list);
177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
179 WARN_ON(!list_empty(&fib->node_list));
180 rhashtable_destroy(&fib->ht);
184 static struct mlxsw_sp_lpm_tree *
185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
187 static struct mlxsw_sp_lpm_tree *lpm_tree;
190 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
191 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
192 if (lpm_tree->ref_count == 0) {
194 one_reserved = false;
202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
203 struct mlxsw_sp_lpm_tree *lpm_tree)
205 char ralta_pl[MLXSW_REG_RALTA_LEN];
207 mlxsw_reg_ralta_pack(ralta_pl, true,
208 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
210 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
214 struct mlxsw_sp_lpm_tree *lpm_tree)
216 char ralta_pl[MLXSW_REG_RALTA_LEN];
218 mlxsw_reg_ralta_pack(ralta_pl, false,
219 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
221 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
226 struct mlxsw_sp_prefix_usage *prefix_usage,
227 struct mlxsw_sp_lpm_tree *lpm_tree)
229 char ralst_pl[MLXSW_REG_RALST_LEN];
232 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
234 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
237 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
238 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
241 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
242 MLXSW_REG_RALST_BIN_NO_CHILD);
243 last_prefix = prefix;
245 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
248 static struct mlxsw_sp_lpm_tree *
249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
250 struct mlxsw_sp_prefix_usage *prefix_usage,
251 enum mlxsw_sp_l3proto proto, bool one_reserved)
253 struct mlxsw_sp_lpm_tree *lpm_tree;
256 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
258 return ERR_PTR(-EBUSY);
259 lpm_tree->proto = proto;
260 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
264 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
267 goto err_left_struct_set;
268 memcpy(&lpm_tree->prefix_usage, prefix_usage,
269 sizeof(lpm_tree->prefix_usage));
273 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
278 struct mlxsw_sp_lpm_tree *lpm_tree)
280 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
283 static struct mlxsw_sp_lpm_tree *
284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
285 struct mlxsw_sp_prefix_usage *prefix_usage,
286 enum mlxsw_sp_l3proto proto, bool one_reserved)
288 struct mlxsw_sp_lpm_tree *lpm_tree;
291 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
292 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
293 if (lpm_tree->ref_count != 0 &&
294 lpm_tree->proto == proto &&
295 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
299 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
300 proto, one_reserved);
301 if (IS_ERR(lpm_tree))
305 lpm_tree->ref_count++;
309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
310 struct mlxsw_sp_lpm_tree *lpm_tree)
312 if (--lpm_tree->ref_count == 0)
313 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
319 struct mlxsw_sp_lpm_tree *lpm_tree;
322 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
323 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
324 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
330 struct mlxsw_sp_vr *vr;
333 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
334 vr = &mlxsw_sp->router.vrs[i];
341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
342 struct mlxsw_sp_vr *vr)
344 char raltb_pl[MLXSW_REG_RALTB_LEN];
346 mlxsw_reg_raltb_pack(raltb_pl, vr->id,
347 (enum mlxsw_reg_ralxx_protocol) vr->proto,
349 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
353 struct mlxsw_sp_vr *vr)
355 char raltb_pl[MLXSW_REG_RALTB_LEN];
357 /* Bind to tree 0 which is default */
358 mlxsw_reg_raltb_pack(raltb_pl, vr->id,
359 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
360 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
365 /* For our purpose, squash main and local table into one */
366 if (tb_id == RT_TABLE_LOCAL)
367 tb_id = RT_TABLE_MAIN;
371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
373 enum mlxsw_sp_l3proto proto)
375 struct mlxsw_sp_vr *vr;
378 tb_id = mlxsw_sp_fix_tb_id(tb_id);
380 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
381 vr = &mlxsw_sp->router.vrs[i];
382 if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
389 unsigned char prefix_len,
391 enum mlxsw_sp_l3proto proto)
393 struct mlxsw_sp_prefix_usage req_prefix_usage;
394 struct mlxsw_sp_lpm_tree *lpm_tree;
395 struct mlxsw_sp_vr *vr;
398 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
400 return ERR_PTR(-EBUSY);
401 vr->fib = mlxsw_sp_fib_create();
403 return ERR_CAST(vr->fib);
407 mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
408 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
409 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
411 if (IS_ERR(lpm_tree)) {
412 err = PTR_ERR(lpm_tree);
415 vr->lpm_tree = lpm_tree;
416 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
424 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
426 mlxsw_sp_fib_destroy(vr->fib);
431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
432 struct mlxsw_sp_vr *vr)
434 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
435 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
436 mlxsw_sp_fib_destroy(vr->fib);
441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
442 struct mlxsw_sp_prefix_usage *req_prefix_usage)
444 struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
445 struct mlxsw_sp_lpm_tree *new_tree;
448 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
451 new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
453 if (IS_ERR(new_tree)) {
454 /* We failed to get a tree according to the required
455 * prefix usage. However, the current tree might be still good
456 * for us if our requirement is subset of the prefixes used
459 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
460 &lpm_tree->prefix_usage))
462 return PTR_ERR(new_tree);
465 /* Prevent packet loss by overwriting existing binding */
466 vr->lpm_tree = new_tree;
467 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
470 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
475 vr->lpm_tree = lpm_tree;
476 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
480 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
481 unsigned char prefix_len,
483 enum mlxsw_sp_l3proto proto)
485 struct mlxsw_sp_vr *vr;
488 tb_id = mlxsw_sp_fix_tb_id(tb_id);
489 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
491 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
495 struct mlxsw_sp_prefix_usage req_prefix_usage;
497 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
498 &vr->fib->prefix_usage);
499 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
500 /* Need to replace LPM tree in case new prefix is required. */
501 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
509 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
511 /* Destroy virtual router entity in case the associated FIB is empty
512 * and allow it to be used for other tables in future. Otherwise,
513 * check if some prefix usage did not disappear and change tree if
514 * that is the case. Note that in case new, smaller tree cannot be
515 * allocated, the original one will be kept being used.
517 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
518 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
520 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
521 &vr->fib->prefix_usage);
524 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
526 struct mlxsw_sp_vr *vr;
530 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
533 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
534 mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
536 if (!mlxsw_sp->router.vrs)
539 for (i = 0; i < max_vrs; i++) {
540 vr = &mlxsw_sp->router.vrs[i];
547 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
549 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
551 /* At this stage we're guaranteed not to have new incoming
552 * FIB notifications and the work queue is free from FIBs
553 * sitting on top of mlxsw netdevs. However, we can still
554 * have other FIBs queued. Flush the queue before flushing
555 * the device's tables. No need for locks, as we're the only
558 mlxsw_core_flush_owq();
559 mlxsw_sp_router_fib_flush(mlxsw_sp);
560 kfree(mlxsw_sp->router.vrs);
563 struct mlxsw_sp_neigh_key {
567 struct mlxsw_sp_neigh_entry {
568 struct list_head rif_list_node;
569 struct rhash_head ht_node;
570 struct mlxsw_sp_neigh_key key;
573 unsigned char ha[ETH_ALEN];
574 struct list_head nexthop_list; /* list of nexthops using
577 struct list_head nexthop_neighs_list_node;
580 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
581 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
582 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
583 .key_len = sizeof(struct mlxsw_sp_neigh_key),
586 static struct mlxsw_sp_neigh_entry *
587 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
590 struct mlxsw_sp_neigh_entry *neigh_entry;
592 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
596 neigh_entry->key.n = n;
597 neigh_entry->rif = rif;
598 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
603 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
609 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
610 struct mlxsw_sp_neigh_entry *neigh_entry)
612 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
613 &neigh_entry->ht_node,
614 mlxsw_sp_neigh_ht_params);
618 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
619 struct mlxsw_sp_neigh_entry *neigh_entry)
621 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
622 &neigh_entry->ht_node,
623 mlxsw_sp_neigh_ht_params);
626 static struct mlxsw_sp_neigh_entry *
627 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
629 struct mlxsw_sp_neigh_entry *neigh_entry;
630 struct mlxsw_sp_rif *r;
633 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
635 return ERR_PTR(-EINVAL);
637 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
639 return ERR_PTR(-ENOMEM);
641 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
643 goto err_neigh_entry_insert;
645 list_add(&neigh_entry->rif_list_node, &r->neigh_list);
649 err_neigh_entry_insert:
650 mlxsw_sp_neigh_entry_free(neigh_entry);
655 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
656 struct mlxsw_sp_neigh_entry *neigh_entry)
658 list_del(&neigh_entry->rif_list_node);
659 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
660 mlxsw_sp_neigh_entry_free(neigh_entry);
663 static struct mlxsw_sp_neigh_entry *
664 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
666 struct mlxsw_sp_neigh_key key;
669 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
670 &key, mlxsw_sp_neigh_ht_params);
674 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
676 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
678 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
681 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
685 struct net_device *dev;
691 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
693 if (!mlxsw_sp->rifs[rif]) {
694 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
699 dev = mlxsw_sp->rifs[rif]->dev;
700 n = neigh_lookup(&arp_tbl, &dipn, dev);
702 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
707 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
708 neigh_event_send(n, NULL);
712 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
719 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
721 /* Hardware starts counting at 0, so add 1. */
724 /* Each record consists of several neighbour entries. */
725 for (i = 0; i < num_entries; i++) {
728 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
729 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
735 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
736 char *rauhtd_pl, int rec_index)
738 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
739 case MLXSW_REG_RAUHTD_TYPE_IPV4:
740 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
743 case MLXSW_REG_RAUHTD_TYPE_IPV6:
749 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
751 u8 num_rec, last_rec_index, num_entries;
753 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
754 last_rec_index = num_rec - 1;
756 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
758 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
759 MLXSW_REG_RAUHTD_TYPE_IPV6)
762 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
764 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
769 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
775 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
779 /* Make sure the neighbour's netdev isn't removed in the
784 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
785 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
788 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
791 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
792 for (i = 0; i < num_rec; i++)
793 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
795 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
802 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
804 struct mlxsw_sp_neigh_entry *neigh_entry;
806 /* Take RTNL mutex here to prevent lists from changes */
808 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
809 nexthop_neighs_list_node)
810 /* If this neigh have nexthops, make the kernel think this neigh
811 * is active regardless of the traffic.
813 neigh_event_send(neigh_entry->key.n, NULL);
818 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
820 unsigned long interval = mlxsw_sp->router.neighs_update.interval;
822 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
823 msecs_to_jiffies(interval));
826 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
828 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
829 router.neighs_update.dw.work);
832 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
834 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
836 mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
838 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
841 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
843 struct mlxsw_sp_neigh_entry *neigh_entry;
844 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
845 router.nexthop_probe_dw.work);
847 /* Iterate over nexthop neighbours, find those who are unresolved and
848 * send arp on them. This solves the chicken-egg problem when
849 * the nexthop wouldn't get offloaded until the neighbor is resolved
850 * but it wouldn't get resolved ever in case traffic is flowing in HW
851 * using different nexthop.
853 * Take RTNL mutex here to prevent lists from changes.
856 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
857 nexthop_neighs_list_node)
858 if (!neigh_entry->connected)
859 neigh_event_send(neigh_entry->key.n, NULL);
862 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
863 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
867 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
868 struct mlxsw_sp_neigh_entry *neigh_entry,
871 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
873 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
874 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
878 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
879 struct mlxsw_sp_neigh_entry *neigh_entry,
880 enum mlxsw_reg_rauht_op op)
882 struct neighbour *n = neigh_entry->key.n;
883 u32 dip = ntohl(*((__be32 *) n->primary_key));
884 char rauht_pl[MLXSW_REG_RAUHT_LEN];
886 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
888 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
892 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
893 struct mlxsw_sp_neigh_entry *neigh_entry,
896 if (!adding && !neigh_entry->connected)
898 neigh_entry->connected = adding;
899 if (neigh_entry->key.n->tbl == &arp_tbl)
900 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
901 mlxsw_sp_rauht_op(adding));
906 struct mlxsw_sp_neigh_event_work {
907 struct work_struct work;
908 struct mlxsw_sp *mlxsw_sp;
912 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
914 struct mlxsw_sp_neigh_event_work *neigh_work =
915 container_of(work, struct mlxsw_sp_neigh_event_work, work);
916 struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
917 struct mlxsw_sp_neigh_entry *neigh_entry;
918 struct neighbour *n = neigh_work->n;
919 unsigned char ha[ETH_ALEN];
920 bool entry_connected;
923 /* If these parameters are changed after we release the lock,
924 * then we are guaranteed to receive another event letting us
927 read_lock_bh(&n->lock);
928 memcpy(ha, n->ha, ETH_ALEN);
929 nud_state = n->nud_state;
931 read_unlock_bh(&n->lock);
934 entry_connected = nud_state & NUD_VALID && !dead;
935 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
936 if (!entry_connected && !neigh_entry)
939 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
940 if (IS_ERR(neigh_entry))
944 memcpy(neigh_entry->ha, ha, ETH_ALEN);
945 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
946 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
948 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
949 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
957 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
958 unsigned long event, void *ptr)
960 struct mlxsw_sp_neigh_event_work *neigh_work;
961 struct mlxsw_sp_port *mlxsw_sp_port;
962 struct mlxsw_sp *mlxsw_sp;
963 unsigned long interval;
964 struct neigh_parms *p;
968 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
971 /* We don't care about changes in the default table. */
972 if (!p->dev || p->tbl != &arp_tbl)
975 /* We are in atomic context and can't take RTNL mutex,
976 * so use RCU variant to walk the device chain.
978 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
982 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
983 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
984 mlxsw_sp->router.neighs_update.interval = interval;
986 mlxsw_sp_port_dev_put(mlxsw_sp_port);
988 case NETEVENT_NEIGH_UPDATE:
991 if (n->tbl != &arp_tbl)
994 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
998 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1000 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1004 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1005 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1008 /* Take a reference to ensure the neighbour won't be
1009 * destructed until we drop the reference in delayed
1013 mlxsw_core_schedule_work(&neigh_work->work);
1014 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1021 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1025 err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1026 &mlxsw_sp_neigh_ht_params);
1030 /* Initialize the polling interval according to the default
1033 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1035 /* Create the delayed works for the activity_update */
1036 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1037 mlxsw_sp_router_neighs_update_work);
1038 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1039 mlxsw_sp_router_probe_unresolved_nexthops);
1040 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1041 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1045 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1047 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1048 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1049 rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1052 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1053 const struct mlxsw_sp_rif *r)
1055 char rauht_pl[MLXSW_REG_RAUHT_LEN];
1057 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1059 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1062 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1063 struct mlxsw_sp_rif *r)
1065 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1067 mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
1068 list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
1070 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1073 struct mlxsw_sp_nexthop_key {
1074 struct fib_nh *fib_nh;
1077 struct mlxsw_sp_nexthop {
1078 struct list_head neigh_list_node; /* member of neigh entry list */
1079 struct list_head rif_list_node;
1080 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1083 struct rhash_head ht_node;
1084 struct mlxsw_sp_nexthop_key key;
1085 struct mlxsw_sp_rif *r;
1086 u8 should_offload:1, /* set indicates this neigh is connected and
1087 * should be put to KVD linear area of this group.
1089 offloaded:1, /* set in case the neigh is actually put into
1090 * KVD linear area of this group.
1092 update:1; /* set indicates that MAC of this neigh should be
1095 struct mlxsw_sp_neigh_entry *neigh_entry;
1098 struct mlxsw_sp_nexthop_group_key {
1099 struct fib_info *fi;
1102 struct mlxsw_sp_nexthop_group {
1103 struct rhash_head ht_node;
1104 struct list_head fib_list; /* list of fib entries that use this group */
1105 struct mlxsw_sp_nexthop_group_key key;
1106 u8 adj_index_valid:1,
1107 gateway:1; /* routes using the group use a gateway */
1111 struct mlxsw_sp_nexthop nexthops[0];
1112 #define nh_rif nexthops[0].r
1115 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1116 .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1117 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1118 .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1121 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1122 struct mlxsw_sp_nexthop_group *nh_grp)
1124 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1126 mlxsw_sp_nexthop_group_ht_params);
1129 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1130 struct mlxsw_sp_nexthop_group *nh_grp)
1132 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1134 mlxsw_sp_nexthop_group_ht_params);
1137 static struct mlxsw_sp_nexthop_group *
1138 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1139 struct mlxsw_sp_nexthop_group_key key)
1141 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1142 mlxsw_sp_nexthop_group_ht_params);
1145 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1146 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1147 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1148 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1151 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1152 struct mlxsw_sp_nexthop *nh)
1154 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1155 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1158 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1159 struct mlxsw_sp_nexthop *nh)
1161 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1162 mlxsw_sp_nexthop_ht_params);
1165 static struct mlxsw_sp_nexthop *
1166 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1167 struct mlxsw_sp_nexthop_key key)
1169 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1170 mlxsw_sp_nexthop_ht_params);
1173 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1174 struct mlxsw_sp_vr *vr,
1175 u32 adj_index, u16 ecmp_size,
1179 char raleu_pl[MLXSW_REG_RALEU_LEN];
1181 mlxsw_reg_raleu_pack(raleu_pl,
1182 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1183 adj_index, ecmp_size, new_adj_index,
1185 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1188 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1189 struct mlxsw_sp_nexthop_group *nh_grp,
1190 u32 old_adj_index, u16 old_ecmp_size)
1192 struct mlxsw_sp_fib_entry *fib_entry;
1193 struct mlxsw_sp_vr *vr = NULL;
1196 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1197 if (vr == fib_entry->fib_node->vr)
1199 vr = fib_entry->fib_node->vr;
1200 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1211 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1212 struct mlxsw_sp_nexthop *nh)
1214 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1215 char ratr_pl[MLXSW_REG_RATR_LEN];
1217 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1218 true, adj_index, neigh_entry->rif);
1219 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1220 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1224 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1225 struct mlxsw_sp_nexthop_group *nh_grp,
1228 u32 adj_index = nh_grp->adj_index; /* base */
1229 struct mlxsw_sp_nexthop *nh;
1233 for (i = 0; i < nh_grp->count; i++) {
1234 nh = &nh_grp->nexthops[i];
1236 if (!nh->should_offload) {
1241 if (nh->update || reallocate) {
1242 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1254 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1255 struct mlxsw_sp_fib_entry *fib_entry);
1258 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1259 struct mlxsw_sp_nexthop_group *nh_grp)
1261 struct mlxsw_sp_fib_entry *fib_entry;
1264 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1265 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1273 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1274 struct mlxsw_sp_nexthop_group *nh_grp)
1276 struct mlxsw_sp_nexthop *nh;
1277 bool offload_change = false;
1280 bool old_adj_index_valid;
1287 if (!nh_grp->gateway) {
1288 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1292 for (i = 0; i < nh_grp->count; i++) {
1293 nh = &nh_grp->nexthops[i];
1295 if (nh->should_offload ^ nh->offloaded) {
1296 offload_change = true;
1297 if (nh->should_offload)
1300 if (nh->should_offload)
1303 if (!offload_change) {
1304 /* Nothing was added or removed, so no need to reallocate. Just
1305 * update MAC on existing adjacency indexes.
1307 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1310 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1316 /* No neigh of this group is connected so we just set
1317 * the trap and let everthing flow through kernel.
1321 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1323 /* We ran out of KVD linear space, just set the
1324 * trap and let everything flow through kernel.
1326 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1330 old_adj_index_valid = nh_grp->adj_index_valid;
1331 old_adj_index = nh_grp->adj_index;
1332 old_ecmp_size = nh_grp->ecmp_size;
1333 nh_grp->adj_index_valid = 1;
1334 nh_grp->adj_index = adj_index;
1335 nh_grp->ecmp_size = ecmp_size;
1336 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1338 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1342 if (!old_adj_index_valid) {
1343 /* The trap was set for fib entries, so we have to call
1344 * fib entry update to unset it and use adjacency index.
1346 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1348 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1354 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1355 old_adj_index, old_ecmp_size);
1356 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1358 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1364 old_adj_index_valid = nh_grp->adj_index_valid;
1365 nh_grp->adj_index_valid = 0;
1366 for (i = 0; i < nh_grp->count; i++) {
1367 nh = &nh_grp->nexthops[i];
1370 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1372 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1373 if (old_adj_index_valid)
1374 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1377 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1380 if (!removing && !nh->should_offload)
1381 nh->should_offload = 1;
1382 else if (removing && nh->offloaded)
1383 nh->should_offload = 0;
1388 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1389 struct mlxsw_sp_neigh_entry *neigh_entry,
1392 struct mlxsw_sp_nexthop *nh;
1394 list_for_each_entry(nh, &neigh_entry->nexthop_list,
1396 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1397 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1401 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1402 struct mlxsw_sp_rif *r)
1408 list_add(&nh->rif_list_node, &r->nexthop_list);
1411 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1416 list_del(&nh->rif_list_node);
1420 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1421 struct mlxsw_sp_nexthop *nh)
1423 struct mlxsw_sp_neigh_entry *neigh_entry;
1424 struct fib_nh *fib_nh = nh->key.fib_nh;
1425 struct neighbour *n;
1429 if (!nh->nh_grp->gateway || nh->neigh_entry)
1432 /* Take a reference of neigh here ensuring that neigh would
1433 * not be detructed before the nexthop entry is finished.
1434 * The reference is taken either in neigh_lookup() or
1435 * in neigh_create() in case n is not found.
1437 n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1439 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1442 neigh_event_send(n, NULL);
1444 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1446 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1447 if (IS_ERR(neigh_entry)) {
1449 goto err_neigh_entry_create;
1453 /* If that is the first nexthop connected to that neigh, add to
1454 * nexthop_neighs_list
1456 if (list_empty(&neigh_entry->nexthop_list))
1457 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1458 &mlxsw_sp->router.nexthop_neighs_list);
1460 nh->neigh_entry = neigh_entry;
1461 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1462 read_lock_bh(&n->lock);
1463 nud_state = n->nud_state;
1465 read_unlock_bh(&n->lock);
1466 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1470 err_neigh_entry_create:
1475 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1476 struct mlxsw_sp_nexthop *nh)
1478 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1479 struct neighbour *n;
1483 n = neigh_entry->key.n;
1485 __mlxsw_sp_nexthop_neigh_update(nh, true);
1486 list_del(&nh->neigh_list_node);
1487 nh->neigh_entry = NULL;
1489 /* If that is the last nexthop connected to that neigh, remove from
1490 * nexthop_neighs_list
1492 if (list_empty(&neigh_entry->nexthop_list))
1493 list_del(&neigh_entry->nexthop_neighs_list_node);
1495 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1496 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1501 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1502 struct mlxsw_sp_nexthop_group *nh_grp,
1503 struct mlxsw_sp_nexthop *nh,
1504 struct fib_nh *fib_nh)
1506 struct net_device *dev = fib_nh->nh_dev;
1507 struct in_device *in_dev;
1508 struct mlxsw_sp_rif *r;
1511 nh->nh_grp = nh_grp;
1512 nh->key.fib_nh = fib_nh;
1513 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1517 in_dev = __in_dev_get_rtnl(dev);
1518 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1519 fib_nh->nh_flags & RTNH_F_LINKDOWN)
1522 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1525 mlxsw_sp_nexthop_rif_init(nh, r);
1527 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1529 goto err_nexthop_neigh_init;
1533 err_nexthop_neigh_init:
1534 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1538 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1539 struct mlxsw_sp_nexthop *nh)
1541 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1542 mlxsw_sp_nexthop_rif_fini(nh);
1543 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1546 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1547 unsigned long event, struct fib_nh *fib_nh)
1549 struct mlxsw_sp_nexthop_key key;
1550 struct mlxsw_sp_nexthop *nh;
1551 struct mlxsw_sp_rif *r;
1553 if (mlxsw_sp->router.aborted)
1556 key.fib_nh = fib_nh;
1557 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1558 if (WARN_ON_ONCE(!nh))
1561 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1566 case FIB_EVENT_NH_ADD:
1567 mlxsw_sp_nexthop_rif_init(nh, r);
1568 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1570 case FIB_EVENT_NH_DEL:
1571 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1572 mlxsw_sp_nexthop_rif_fini(nh);
1576 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1579 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1580 struct mlxsw_sp_rif *r)
1582 struct mlxsw_sp_nexthop *nh, *tmp;
1584 list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
1585 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1586 mlxsw_sp_nexthop_rif_fini(nh);
1587 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1591 static struct mlxsw_sp_nexthop_group *
1592 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1594 struct mlxsw_sp_nexthop_group *nh_grp;
1595 struct mlxsw_sp_nexthop *nh;
1596 struct fib_nh *fib_nh;
1601 alloc_size = sizeof(*nh_grp) +
1602 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1603 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1605 return ERR_PTR(-ENOMEM);
1606 INIT_LIST_HEAD(&nh_grp->fib_list);
1607 nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1608 nh_grp->count = fi->fib_nhs;
1609 nh_grp->key.fi = fi;
1610 for (i = 0; i < nh_grp->count; i++) {
1611 nh = &nh_grp->nexthops[i];
1612 fib_nh = &fi->fib_nh[i];
1613 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1615 goto err_nexthop_init;
1617 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1619 goto err_nexthop_group_insert;
1620 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1623 err_nexthop_group_insert:
1625 for (i--; i >= 0; i--) {
1626 nh = &nh_grp->nexthops[i];
1627 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1630 return ERR_PTR(err);
1634 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1635 struct mlxsw_sp_nexthop_group *nh_grp)
1637 struct mlxsw_sp_nexthop *nh;
1640 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1641 for (i = 0; i < nh_grp->count; i++) {
1642 nh = &nh_grp->nexthops[i];
1643 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1645 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1646 WARN_ON_ONCE(nh_grp->adj_index_valid);
1650 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1651 struct mlxsw_sp_fib_entry *fib_entry,
1652 struct fib_info *fi)
1654 struct mlxsw_sp_nexthop_group_key key;
1655 struct mlxsw_sp_nexthop_group *nh_grp;
1658 nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1660 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1662 return PTR_ERR(nh_grp);
1664 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1665 fib_entry->nh_group = nh_grp;
1669 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1670 struct mlxsw_sp_fib_entry *fib_entry)
1672 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1674 list_del(&fib_entry->nexthop_group_node);
1675 if (!list_empty(&nh_grp->fib_list))
1677 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1681 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1683 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1685 if (fib_entry->params.tos)
1688 switch (fib_entry->type) {
1689 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1690 return !!nh_group->adj_index_valid;
1691 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1692 return !!nh_group->nh_rif;
1698 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1700 fib_entry->offloaded = true;
1702 switch (fib_entry->fib_node->vr->proto) {
1703 case MLXSW_SP_L3_PROTO_IPV4:
1704 fib_info_offload_inc(fib_entry->nh_group->key.fi);
1706 case MLXSW_SP_L3_PROTO_IPV6:
1712 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1714 switch (fib_entry->fib_node->vr->proto) {
1715 case MLXSW_SP_L3_PROTO_IPV4:
1716 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1718 case MLXSW_SP_L3_PROTO_IPV6:
1722 fib_entry->offloaded = false;
1726 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1727 enum mlxsw_reg_ralue_op op, int err)
1730 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1731 if (!fib_entry->offloaded)
1733 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1734 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1737 if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1738 !fib_entry->offloaded)
1739 mlxsw_sp_fib_entry_offload_set(fib_entry);
1740 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1741 fib_entry->offloaded)
1742 mlxsw_sp_fib_entry_offload_unset(fib_entry);
1749 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1750 struct mlxsw_sp_fib_entry *fib_entry,
1751 enum mlxsw_reg_ralue_op op)
1753 char ralue_pl[MLXSW_REG_RALUE_LEN];
1754 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1755 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1756 enum mlxsw_reg_ralue_trap_action trap_action;
1758 u32 adjacency_index = 0;
1761 /* In case the nexthop group adjacency index is valid, use it
1762 * with provided ECMP size. Otherwise, setup trap and pass
1763 * traffic to kernel.
1765 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1766 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1767 adjacency_index = fib_entry->nh_group->adj_index;
1768 ecmp_size = fib_entry->nh_group->ecmp_size;
1770 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1771 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1774 mlxsw_reg_ralue_pack4(ralue_pl,
1775 (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1776 vr->id, fib_entry->fib_node->key.prefix_len,
1778 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1779 adjacency_index, ecmp_size);
1780 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1783 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1784 struct mlxsw_sp_fib_entry *fib_entry,
1785 enum mlxsw_reg_ralue_op op)
1787 struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1788 enum mlxsw_reg_ralue_trap_action trap_action;
1789 char ralue_pl[MLXSW_REG_RALUE_LEN];
1790 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1791 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1795 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1796 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1799 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1800 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1803 mlxsw_reg_ralue_pack4(ralue_pl,
1804 (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1805 vr->id, fib_entry->fib_node->key.prefix_len,
1807 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
1808 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1811 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1812 struct mlxsw_sp_fib_entry *fib_entry,
1813 enum mlxsw_reg_ralue_op op)
1815 char ralue_pl[MLXSW_REG_RALUE_LEN];
1816 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1817 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1819 mlxsw_reg_ralue_pack4(ralue_pl,
1820 (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1821 vr->id, fib_entry->fib_node->key.prefix_len,
1823 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1824 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1827 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1828 struct mlxsw_sp_fib_entry *fib_entry,
1829 enum mlxsw_reg_ralue_op op)
1831 switch (fib_entry->type) {
1832 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1833 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1834 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1835 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1836 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1837 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1842 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1843 struct mlxsw_sp_fib_entry *fib_entry,
1844 enum mlxsw_reg_ralue_op op)
1848 switch (fib_entry->fib_node->vr->proto) {
1849 case MLXSW_SP_L3_PROTO_IPV4:
1850 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1852 case MLXSW_SP_L3_PROTO_IPV6:
1855 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
1859 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1860 struct mlxsw_sp_fib_entry *fib_entry)
1862 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1863 MLXSW_REG_RALUE_OP_WRITE_WRITE);
1866 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1867 struct mlxsw_sp_fib_entry *fib_entry)
1869 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1870 MLXSW_REG_RALUE_OP_WRITE_DELETE);
1874 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
1875 const struct fib_entry_notifier_info *fen_info,
1876 struct mlxsw_sp_fib_entry *fib_entry)
1878 struct fib_info *fi = fen_info->fi;
1880 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1881 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1884 if (fen_info->type != RTN_UNICAST)
1886 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1887 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1889 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1893 static struct mlxsw_sp_fib_entry *
1894 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
1895 struct mlxsw_sp_fib_node *fib_node,
1896 const struct fib_entry_notifier_info *fen_info)
1898 struct mlxsw_sp_fib_entry *fib_entry;
1901 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
1904 goto err_fib_entry_alloc;
1907 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
1909 goto err_fib4_entry_type_set;
1911 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
1913 goto err_nexthop_group_get;
1915 fib_entry->params.prio = fen_info->fi->fib_priority;
1916 fib_entry->params.tb_id = fen_info->tb_id;
1917 fib_entry->params.type = fen_info->type;
1918 fib_entry->params.tos = fen_info->tos;
1920 fib_entry->fib_node = fib_node;
1924 err_nexthop_group_get:
1925 err_fib4_entry_type_set:
1927 err_fib_entry_alloc:
1928 return ERR_PTR(err);
1931 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1932 struct mlxsw_sp_fib_entry *fib_entry)
1934 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1938 static struct mlxsw_sp_fib_node *
1939 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
1940 const struct fib_entry_notifier_info *fen_info);
1942 static struct mlxsw_sp_fib_entry *
1943 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
1944 const struct fib_entry_notifier_info *fen_info)
1946 struct mlxsw_sp_fib_entry *fib_entry;
1947 struct mlxsw_sp_fib_node *fib_node;
1949 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
1950 if (IS_ERR(fib_node))
1953 list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
1954 if (fib_entry->params.tb_id == fen_info->tb_id &&
1955 fib_entry->params.tos == fen_info->tos &&
1956 fib_entry->params.type == fen_info->type &&
1957 fib_entry->nh_group->key.fi == fen_info->fi) {
1965 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
1966 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
1967 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
1968 .key_len = sizeof(struct mlxsw_sp_fib_key),
1969 .automatic_shrinking = true,
1972 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
1973 struct mlxsw_sp_fib_node *fib_node)
1975 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
1976 mlxsw_sp_fib_ht_params);
1979 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
1980 struct mlxsw_sp_fib_node *fib_node)
1982 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
1983 mlxsw_sp_fib_ht_params);
1986 static struct mlxsw_sp_fib_node *
1987 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1988 size_t addr_len, unsigned char prefix_len)
1990 struct mlxsw_sp_fib_key key;
1992 memset(&key, 0, sizeof(key));
1993 memcpy(key.addr, addr, addr_len);
1994 key.prefix_len = prefix_len;
1995 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
1998 static struct mlxsw_sp_fib_node *
1999 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
2000 size_t addr_len, unsigned char prefix_len)
2002 struct mlxsw_sp_fib_node *fib_node;
2004 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2008 INIT_LIST_HEAD(&fib_node->entry_list);
2009 list_add(&fib_node->list, &vr->fib->node_list);
2010 memcpy(fib_node->key.addr, addr, addr_len);
2011 fib_node->key.prefix_len = prefix_len;
2012 mlxsw_sp_fib_node_insert(vr->fib, fib_node);
2018 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2020 mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
2021 list_del(&fib_node->list);
2022 WARN_ON(!list_empty(&fib_node->entry_list));
2027 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2028 const struct mlxsw_sp_fib_entry *fib_entry)
2030 return list_first_entry(&fib_node->entry_list,
2031 struct mlxsw_sp_fib_entry, list) == fib_entry;
2034 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2036 unsigned char prefix_len = fib_node->key.prefix_len;
2037 struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2039 if (fib->prefix_ref_count[prefix_len]++ == 0)
2040 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2043 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2045 unsigned char prefix_len = fib_node->key.prefix_len;
2046 struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2048 if (--fib->prefix_ref_count[prefix_len] == 0)
2049 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2052 static struct mlxsw_sp_fib_node *
2053 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2054 const struct fib_entry_notifier_info *fen_info)
2056 struct mlxsw_sp_fib_node *fib_node;
2057 struct mlxsw_sp_vr *vr;
2060 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
2061 MLXSW_SP_L3_PROTO_IPV4);
2063 return ERR_CAST(vr);
2065 fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
2066 sizeof(fen_info->dst),
2071 fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
2072 sizeof(fen_info->dst),
2076 goto err_fib_node_create;
2081 err_fib_node_create:
2082 mlxsw_sp_vr_put(mlxsw_sp, vr);
2083 return ERR_PTR(err);
2086 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2087 struct mlxsw_sp_fib_node *fib_node)
2089 struct mlxsw_sp_vr *vr = fib_node->vr;
2091 if (!list_empty(&fib_node->entry_list))
2093 mlxsw_sp_fib_node_destroy(fib_node);
2094 mlxsw_sp_vr_put(mlxsw_sp, vr);
2097 static struct mlxsw_sp_fib_entry *
2098 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2099 const struct mlxsw_sp_fib_entry_params *params)
2101 struct mlxsw_sp_fib_entry *fib_entry;
2103 list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2104 if (fib_entry->params.tb_id > params->tb_id)
2106 if (fib_entry->params.tb_id != params->tb_id)
2108 if (fib_entry->params.tos > params->tos)
2110 if (fib_entry->params.prio >= params->prio ||
2111 fib_entry->params.tos < params->tos)
2118 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2119 struct mlxsw_sp_fib_entry *new_entry)
2121 struct mlxsw_sp_fib_node *fib_node;
2123 if (WARN_ON(!fib_entry))
2126 fib_node = fib_entry->fib_node;
2127 list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2128 if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2129 fib_entry->params.tos != new_entry->params.tos ||
2130 fib_entry->params.prio != new_entry->params.prio)
2134 list_add_tail(&new_entry->list, &fib_entry->list);
2139 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2140 struct mlxsw_sp_fib_entry *new_entry,
2141 bool replace, bool append)
2143 struct mlxsw_sp_fib_entry *fib_entry;
2145 fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2148 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2149 if (replace && WARN_ON(!fib_entry))
2152 /* Insert new entry before replaced one, so that we can later
2153 * remove the second.
2156 list_add_tail(&new_entry->list, &fib_entry->list);
2158 struct mlxsw_sp_fib_entry *last;
2160 list_for_each_entry(last, &fib_node->entry_list, list) {
2161 if (new_entry->params.tb_id > last->params.tb_id)
2167 list_add(&new_entry->list, &fib_entry->list);
2169 list_add(&new_entry->list, &fib_node->entry_list);
2176 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2178 list_del(&fib_entry->list);
2182 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2183 const struct mlxsw_sp_fib_node *fib_node,
2184 struct mlxsw_sp_fib_entry *fib_entry)
2186 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2189 /* To prevent packet loss, overwrite the previously offloaded
2192 if (!list_is_singular(&fib_node->entry_list)) {
2193 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2194 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2196 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2199 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2203 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2204 const struct mlxsw_sp_fib_node *fib_node,
2205 struct mlxsw_sp_fib_entry *fib_entry)
2207 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2210 /* Promote the next entry by overwriting the deleted entry */
2211 if (!list_is_singular(&fib_node->entry_list)) {
2212 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2213 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2215 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2216 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2220 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2223 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2224 struct mlxsw_sp_fib_entry *fib_entry,
2225 bool replace, bool append)
2227 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2230 err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2235 err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2237 goto err_fib4_node_entry_add;
2239 mlxsw_sp_fib_node_prefix_inc(fib_node);
2243 err_fib4_node_entry_add:
2244 mlxsw_sp_fib4_node_list_remove(fib_entry);
2249 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2250 struct mlxsw_sp_fib_entry *fib_entry)
2252 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2254 mlxsw_sp_fib_node_prefix_dec(fib_node);
2255 mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2256 mlxsw_sp_fib4_node_list_remove(fib_entry);
2259 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2260 struct mlxsw_sp_fib_entry *fib_entry,
2263 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2264 struct mlxsw_sp_fib_entry *replaced;
2269 /* We inserted the new entry before replaced one */
2270 replaced = list_next_entry(fib_entry, list);
2272 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2273 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2274 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2278 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2279 const struct fib_entry_notifier_info *fen_info,
2280 bool replace, bool append)
2282 struct mlxsw_sp_fib_entry *fib_entry;
2283 struct mlxsw_sp_fib_node *fib_node;
2286 if (mlxsw_sp->router.aborted)
2289 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2290 if (IS_ERR(fib_node)) {
2291 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2292 return PTR_ERR(fib_node);
2295 fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2296 if (IS_ERR(fib_entry)) {
2297 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2298 err = PTR_ERR(fib_entry);
2299 goto err_fib4_entry_create;
2302 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2305 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2306 goto err_fib4_node_entry_link;
2309 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2313 err_fib4_node_entry_link:
2314 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2315 err_fib4_entry_create:
2316 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2320 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2321 struct fib_entry_notifier_info *fen_info)
2323 struct mlxsw_sp_fib_entry *fib_entry;
2324 struct mlxsw_sp_fib_node *fib_node;
2326 if (mlxsw_sp->router.aborted)
2329 fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2330 if (WARN_ON(!fib_entry))
2332 fib_node = fib_entry->fib_node;
2334 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2335 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2336 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2339 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2341 char ralta_pl[MLXSW_REG_RALTA_LEN];
2342 char ralst_pl[MLXSW_REG_RALST_LEN];
2343 char raltb_pl[MLXSW_REG_RALTB_LEN];
2344 char ralue_pl[MLXSW_REG_RALUE_LEN];
2347 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2348 MLXSW_SP_LPM_TREE_MIN);
2349 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2353 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2354 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2358 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2359 MLXSW_SP_LPM_TREE_MIN);
2360 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
2364 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2365 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
2366 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2367 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2370 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2371 struct mlxsw_sp_fib_node *fib_node)
2373 struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2375 list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2376 bool do_break = &tmp->list == &fib_node->entry_list;
2378 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2379 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2380 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2381 /* Break when entry list is empty and node was freed.
2382 * Otherwise, we'll access freed memory in the next
2390 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2391 struct mlxsw_sp_fib_node *fib_node)
2393 switch (fib_node->vr->proto) {
2394 case MLXSW_SP_L3_PROTO_IPV4:
2395 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2397 case MLXSW_SP_L3_PROTO_IPV6:
2403 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2405 struct mlxsw_sp_fib_node *fib_node, *tmp;
2406 struct mlxsw_sp_vr *vr;
2409 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2410 vr = &mlxsw_sp->router.vrs[i];
2415 list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
2417 bool do_break = &tmp->list == &vr->fib->node_list;
2419 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2426 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2430 if (mlxsw_sp->router.aborted)
2432 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2433 mlxsw_sp_router_fib_flush(mlxsw_sp);
2434 mlxsw_sp->router.aborted = true;
2435 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2437 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2440 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2442 char ritr_pl[MLXSW_REG_RITR_LEN];
2445 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2446 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2447 if (WARN_ON_ONCE(err))
2450 mlxsw_reg_ritr_enable_set(ritr_pl, false);
2451 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2454 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2455 struct mlxsw_sp_rif *r)
2457 mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
2458 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
2459 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
2462 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2464 char rgcr_pl[MLXSW_REG_RGCR_LEN];
2468 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
2471 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2472 mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
2474 if (!mlxsw_sp->rifs)
2477 mlxsw_reg_rgcr_pack(rgcr_pl, true);
2478 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
2479 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2486 kfree(mlxsw_sp->rifs);
2490 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2492 char rgcr_pl[MLXSW_REG_RGCR_LEN];
2495 mlxsw_reg_rgcr_pack(rgcr_pl, false);
2496 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2498 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2499 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2501 kfree(mlxsw_sp->rifs);
2504 struct mlxsw_sp_fib_event_work {
2505 struct work_struct work;
2507 struct fib_entry_notifier_info fen_info;
2508 struct fib_nh_notifier_info fnh_info;
2510 struct mlxsw_sp *mlxsw_sp;
2511 unsigned long event;
2514 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2516 struct mlxsw_sp_fib_event_work *fib_work =
2517 container_of(work, struct mlxsw_sp_fib_event_work, work);
2518 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2519 bool replace, append;
2522 /* Protect internal structures from changes */
2524 switch (fib_work->event) {
2525 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2526 case FIB_EVENT_ENTRY_APPEND: /* fall through */
2527 case FIB_EVENT_ENTRY_ADD:
2528 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2529 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2530 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2533 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2534 fib_info_put(fib_work->fen_info.fi);
2536 case FIB_EVENT_ENTRY_DEL:
2537 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2538 fib_info_put(fib_work->fen_info.fi);
2540 case FIB_EVENT_RULE_ADD: /* fall through */
2541 case FIB_EVENT_RULE_DEL:
2542 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2544 case FIB_EVENT_NH_ADD: /* fall through */
2545 case FIB_EVENT_NH_DEL:
2546 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2547 fib_work->fnh_info.fib_nh);
2548 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2555 /* Called with rcu_read_lock() */
2556 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2557 unsigned long event, void *ptr)
2559 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2560 struct mlxsw_sp_fib_event_work *fib_work;
2561 struct fib_notifier_info *info = ptr;
2563 if (!net_eq(info->net, &init_net))
2566 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2567 if (WARN_ON(!fib_work))
2570 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2571 fib_work->mlxsw_sp = mlxsw_sp;
2572 fib_work->event = event;
2575 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2576 case FIB_EVENT_ENTRY_APPEND: /* fall through */
2577 case FIB_EVENT_ENTRY_ADD: /* fall through */
2578 case FIB_EVENT_ENTRY_DEL:
2579 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2580 /* Take referece on fib_info to prevent it from being
2581 * freed while work is queued. Release it afterwards.
2583 fib_info_hold(fib_work->fen_info.fi);
2585 case FIB_EVENT_NH_ADD: /* fall through */
2586 case FIB_EVENT_NH_DEL:
2587 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2588 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2592 mlxsw_core_schedule_work(&fib_work->work);
2597 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2599 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2601 /* Flush pending FIB notifications and then flush the device's
2602 * table before requesting another dump. The FIB notification
2603 * block is unregistered, so no need to take RTNL.
2605 mlxsw_core_flush_owq();
2606 mlxsw_sp_router_fib_flush(mlxsw_sp);
2609 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2613 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2614 err = __mlxsw_sp_router_init(mlxsw_sp);
2618 err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2619 &mlxsw_sp_nexthop_ht_params);
2621 goto err_nexthop_ht_init;
2623 err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2624 &mlxsw_sp_nexthop_group_ht_params);
2626 goto err_nexthop_group_ht_init;
2628 mlxsw_sp_lpm_init(mlxsw_sp);
2629 err = mlxsw_sp_vrs_init(mlxsw_sp);
2633 err = mlxsw_sp_neigh_init(mlxsw_sp);
2635 goto err_neigh_init;
2637 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2638 err = register_fib_notifier(&mlxsw_sp->fib_nb,
2639 mlxsw_sp_router_fib_dump_flush);
2641 goto err_register_fib_notifier;
2645 err_register_fib_notifier:
2646 mlxsw_sp_neigh_fini(mlxsw_sp);
2648 mlxsw_sp_vrs_fini(mlxsw_sp);
2650 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2651 err_nexthop_group_ht_init:
2652 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2653 err_nexthop_ht_init:
2654 __mlxsw_sp_router_fini(mlxsw_sp);
2658 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2660 unregister_fib_notifier(&mlxsw_sp->fib_nb);
2661 mlxsw_sp_neigh_fini(mlxsw_sp);
2662 mlxsw_sp_vrs_fini(mlxsw_sp);
2663 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2664 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2665 __mlxsw_sp_router_fini(mlxsw_sp);