]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
Merge tags 'for-linus' and 'for-next' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <linux/netdevice.h>
45 #include <net/netevent.h>
46 #include <net/neighbour.h>
47 #include <net/arp.h>
48 #include <net/ip_fib.h>
49 #include <net/fib_rules.h>
50 #include <net/l3mdev.h>
51
52 #include "spectrum.h"
53 #include "core.h"
54 #include "reg.h"
55 #include "spectrum_cnt.h"
56 #include "spectrum_dpipe.h"
57 #include "spectrum_router.h"
58
59 struct mlxsw_sp_rif {
60         struct list_head nexthop_list;
61         struct list_head neigh_list;
62         struct net_device *dev;
63         struct mlxsw_sp_fid *f;
64         unsigned char addr[ETH_ALEN];
65         int mtu;
66         u16 rif_index;
67         u16 vr_id;
68         unsigned int counter_ingress;
69         bool counter_ingress_valid;
70         unsigned int counter_egress;
71         bool counter_egress_valid;
72 };
73
74 static unsigned int *
75 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
76                            enum mlxsw_sp_rif_counter_dir dir)
77 {
78         switch (dir) {
79         case MLXSW_SP_RIF_COUNTER_EGRESS:
80                 return &rif->counter_egress;
81         case MLXSW_SP_RIF_COUNTER_INGRESS:
82                 return &rif->counter_ingress;
83         }
84         return NULL;
85 }
86
87 static bool
88 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
89                                enum mlxsw_sp_rif_counter_dir dir)
90 {
91         switch (dir) {
92         case MLXSW_SP_RIF_COUNTER_EGRESS:
93                 return rif->counter_egress_valid;
94         case MLXSW_SP_RIF_COUNTER_INGRESS:
95                 return rif->counter_ingress_valid;
96         }
97         return false;
98 }
99
100 static void
101 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
102                                enum mlxsw_sp_rif_counter_dir dir,
103                                bool valid)
104 {
105         switch (dir) {
106         case MLXSW_SP_RIF_COUNTER_EGRESS:
107                 rif->counter_egress_valid = valid;
108                 break;
109         case MLXSW_SP_RIF_COUNTER_INGRESS:
110                 rif->counter_ingress_valid = valid;
111                 break;
112         }
113 }
114
115 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
116                                      unsigned int counter_index, bool enable,
117                                      enum mlxsw_sp_rif_counter_dir dir)
118 {
119         char ritr_pl[MLXSW_REG_RITR_LEN];
120         bool is_egress = false;
121         int err;
122
123         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
124                 is_egress = true;
125         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
126         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
127         if (err)
128                 return err;
129
130         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
131                                     is_egress);
132         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
133 }
134
135 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
136                                    struct mlxsw_sp_rif *rif,
137                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
138 {
139         char ricnt_pl[MLXSW_REG_RICNT_LEN];
140         unsigned int *p_counter_index;
141         bool valid;
142         int err;
143
144         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
145         if (!valid)
146                 return -EINVAL;
147
148         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
149         if (!p_counter_index)
150                 return -EINVAL;
151         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
152                              MLXSW_REG_RICNT_OPCODE_NOP);
153         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
154         if (err)
155                 return err;
156         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
157         return 0;
158 }
159
160 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
161                                       unsigned int counter_index)
162 {
163         char ricnt_pl[MLXSW_REG_RICNT_LEN];
164
165         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
166                              MLXSW_REG_RICNT_OPCODE_CLEAR);
167         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
168 }
169
170 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
171                                struct mlxsw_sp_rif *rif,
172                                enum mlxsw_sp_rif_counter_dir dir)
173 {
174         unsigned int *p_counter_index;
175         int err;
176
177         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
178         if (!p_counter_index)
179                 return -EINVAL;
180         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
181                                      p_counter_index);
182         if (err)
183                 return err;
184
185         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
186         if (err)
187                 goto err_counter_clear;
188
189         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
190                                         *p_counter_index, true, dir);
191         if (err)
192                 goto err_counter_edit;
193         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
194         return 0;
195
196 err_counter_edit:
197 err_counter_clear:
198         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
199                               *p_counter_index);
200         return err;
201 }
202
203 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
204                                struct mlxsw_sp_rif *rif,
205                                enum mlxsw_sp_rif_counter_dir dir)
206 {
207         unsigned int *p_counter_index;
208
209         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
210         if (WARN_ON(!p_counter_index))
211                 return;
212         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
213                                   *p_counter_index, false, dir);
214         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
215                               *p_counter_index);
216         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
217 }
218
219 static struct mlxsw_sp_rif *
220 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
221                          const struct net_device *dev);
222
223 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
224         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
225
226 static bool
227 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
228                              struct mlxsw_sp_prefix_usage *prefix_usage2)
229 {
230         unsigned char prefix;
231
232         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
233                 if (!test_bit(prefix, prefix_usage2->b))
234                         return false;
235         }
236         return true;
237 }
238
239 static bool
240 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
241                          struct mlxsw_sp_prefix_usage *prefix_usage2)
242 {
243         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
244 }
245
246 static bool
247 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
248 {
249         struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
250
251         return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
252 }
253
254 static void
255 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
256                           struct mlxsw_sp_prefix_usage *prefix_usage2)
257 {
258         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
259 }
260
261 static void
262 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
263                           unsigned char prefix_len)
264 {
265         set_bit(prefix_len, prefix_usage->b);
266 }
267
268 static void
269 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
270                             unsigned char prefix_len)
271 {
272         clear_bit(prefix_len, prefix_usage->b);
273 }
274
275 struct mlxsw_sp_fib_key {
276         unsigned char addr[sizeof(struct in6_addr)];
277         unsigned char prefix_len;
278 };
279
280 enum mlxsw_sp_fib_entry_type {
281         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
282         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
283         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
284 };
285
286 struct mlxsw_sp_nexthop_group;
287
288 struct mlxsw_sp_fib_node {
289         struct list_head entry_list;
290         struct list_head list;
291         struct rhash_head ht_node;
292         struct mlxsw_sp_fib *fib;
293         struct mlxsw_sp_fib_key key;
294 };
295
296 struct mlxsw_sp_fib_entry_params {
297         u32 tb_id;
298         u32 prio;
299         u8 tos;
300         u8 type;
301 };
302
303 struct mlxsw_sp_fib_entry {
304         struct list_head list;
305         struct mlxsw_sp_fib_node *fib_node;
306         enum mlxsw_sp_fib_entry_type type;
307         struct list_head nexthop_group_node;
308         struct mlxsw_sp_nexthop_group *nh_group;
309         struct mlxsw_sp_fib_entry_params params;
310         bool offloaded;
311 };
312
313 struct mlxsw_sp_fib {
314         struct rhashtable ht;
315         struct list_head node_list;
316         struct mlxsw_sp_vr *vr;
317         struct mlxsw_sp_lpm_tree *lpm_tree;
318         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
319         struct mlxsw_sp_prefix_usage prefix_usage;
320         enum mlxsw_sp_l3proto proto;
321 };
322
323 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
324
325 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
326                                                 enum mlxsw_sp_l3proto proto)
327 {
328         struct mlxsw_sp_fib *fib;
329         int err;
330
331         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
332         if (!fib)
333                 return ERR_PTR(-ENOMEM);
334         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
335         if (err)
336                 goto err_rhashtable_init;
337         INIT_LIST_HEAD(&fib->node_list);
338         fib->proto = proto;
339         fib->vr = vr;
340         return fib;
341
342 err_rhashtable_init:
343         kfree(fib);
344         return ERR_PTR(err);
345 }
346
347 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
348 {
349         WARN_ON(!list_empty(&fib->node_list));
350         WARN_ON(fib->lpm_tree);
351         rhashtable_destroy(&fib->ht);
352         kfree(fib);
353 }
354
355 static struct mlxsw_sp_lpm_tree *
356 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
357 {
358         static struct mlxsw_sp_lpm_tree *lpm_tree;
359         int i;
360
361         for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
362                 lpm_tree = &mlxsw_sp->router.lpm.trees[i];
363                 if (lpm_tree->ref_count == 0)
364                         return lpm_tree;
365         }
366         return NULL;
367 }
368
369 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
370                                    struct mlxsw_sp_lpm_tree *lpm_tree)
371 {
372         char ralta_pl[MLXSW_REG_RALTA_LEN];
373
374         mlxsw_reg_ralta_pack(ralta_pl, true,
375                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
376                              lpm_tree->id);
377         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
378 }
379
380 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
381                                   struct mlxsw_sp_lpm_tree *lpm_tree)
382 {
383         char ralta_pl[MLXSW_REG_RALTA_LEN];
384
385         mlxsw_reg_ralta_pack(ralta_pl, false,
386                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
387                              lpm_tree->id);
388         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
389 }
390
391 static int
392 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
393                                   struct mlxsw_sp_prefix_usage *prefix_usage,
394                                   struct mlxsw_sp_lpm_tree *lpm_tree)
395 {
396         char ralst_pl[MLXSW_REG_RALST_LEN];
397         u8 root_bin = 0;
398         u8 prefix;
399         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
400
401         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
402                 root_bin = prefix;
403
404         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
405         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
406                 if (prefix == 0)
407                         continue;
408                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
409                                          MLXSW_REG_RALST_BIN_NO_CHILD);
410                 last_prefix = prefix;
411         }
412         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
413 }
414
415 static struct mlxsw_sp_lpm_tree *
416 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
417                          struct mlxsw_sp_prefix_usage *prefix_usage,
418                          enum mlxsw_sp_l3proto proto)
419 {
420         struct mlxsw_sp_lpm_tree *lpm_tree;
421         int err;
422
423         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
424         if (!lpm_tree)
425                 return ERR_PTR(-EBUSY);
426         lpm_tree->proto = proto;
427         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
428         if (err)
429                 return ERR_PTR(err);
430
431         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
432                                                 lpm_tree);
433         if (err)
434                 goto err_left_struct_set;
435         memcpy(&lpm_tree->prefix_usage, prefix_usage,
436                sizeof(lpm_tree->prefix_usage));
437         return lpm_tree;
438
439 err_left_struct_set:
440         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
441         return ERR_PTR(err);
442 }
443
444 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
445                                      struct mlxsw_sp_lpm_tree *lpm_tree)
446 {
447         return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
448 }
449
450 static struct mlxsw_sp_lpm_tree *
451 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
452                       struct mlxsw_sp_prefix_usage *prefix_usage,
453                       enum mlxsw_sp_l3proto proto)
454 {
455         struct mlxsw_sp_lpm_tree *lpm_tree;
456         int i;
457
458         for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
459                 lpm_tree = &mlxsw_sp->router.lpm.trees[i];
460                 if (lpm_tree->ref_count != 0 &&
461                     lpm_tree->proto == proto &&
462                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
463                                              prefix_usage))
464                         goto inc_ref_count;
465         }
466         lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
467                                             proto);
468         if (IS_ERR(lpm_tree))
469                 return lpm_tree;
470
471 inc_ref_count:
472         lpm_tree->ref_count++;
473         return lpm_tree;
474 }
475
476 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
477                                  struct mlxsw_sp_lpm_tree *lpm_tree)
478 {
479         if (--lpm_tree->ref_count == 0)
480                 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
481         return 0;
482 }
483
484 #define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
485
486 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
487 {
488         struct mlxsw_sp_lpm_tree *lpm_tree;
489         u64 max_trees;
490         int i;
491
492         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
493                 return -EIO;
494
495         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
496         mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
497         mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count,
498                                              sizeof(struct mlxsw_sp_lpm_tree),
499                                              GFP_KERNEL);
500         if (!mlxsw_sp->router.lpm.trees)
501                 return -ENOMEM;
502
503         for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
504                 lpm_tree = &mlxsw_sp->router.lpm.trees[i];
505                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
506         }
507
508         return 0;
509 }
510
511 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
512 {
513         kfree(mlxsw_sp->router.lpm.trees);
514 }
515
516 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
517 {
518         return !!vr->fib4;
519 }
520
521 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
522 {
523         struct mlxsw_sp_vr *vr;
524         int i;
525
526         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
527                 vr = &mlxsw_sp->router.vrs[i];
528                 if (!mlxsw_sp_vr_is_used(vr))
529                         return vr;
530         }
531         return NULL;
532 }
533
534 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
535                                      const struct mlxsw_sp_fib *fib)
536 {
537         char raltb_pl[MLXSW_REG_RALTB_LEN];
538
539         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
540                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
541                              fib->lpm_tree->id);
542         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
543 }
544
545 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
546                                        const struct mlxsw_sp_fib *fib)
547 {
548         char raltb_pl[MLXSW_REG_RALTB_LEN];
549
550         /* Bind to tree 0 which is default */
551         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
552                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
553         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
554 }
555
556 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
557 {
558         /* For our purpose, squash main and local table into one */
559         if (tb_id == RT_TABLE_LOCAL)
560                 tb_id = RT_TABLE_MAIN;
561         return tb_id;
562 }
563
564 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
565                                             u32 tb_id)
566 {
567         struct mlxsw_sp_vr *vr;
568         int i;
569
570         tb_id = mlxsw_sp_fix_tb_id(tb_id);
571
572         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
573                 vr = &mlxsw_sp->router.vrs[i];
574                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
575                         return vr;
576         }
577         return NULL;
578 }
579
580 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
581                                             enum mlxsw_sp_l3proto proto)
582 {
583         switch (proto) {
584         case MLXSW_SP_L3_PROTO_IPV4:
585                 return vr->fib4;
586         case MLXSW_SP_L3_PROTO_IPV6:
587                 BUG_ON(1);
588         }
589         return NULL;
590 }
591
592 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
593                                               u32 tb_id)
594 {
595         struct mlxsw_sp_vr *vr;
596
597         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
598         if (!vr)
599                 return ERR_PTR(-EBUSY);
600         vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
601         if (IS_ERR(vr->fib4))
602                 return ERR_CAST(vr->fib4);
603         vr->tb_id = tb_id;
604         return vr;
605 }
606
607 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
608 {
609         mlxsw_sp_fib_destroy(vr->fib4);
610         vr->fib4 = NULL;
611 }
612
613 static int
614 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib,
615                            struct mlxsw_sp_prefix_usage *req_prefix_usage)
616 {
617         struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree;
618         struct mlxsw_sp_lpm_tree *new_tree;
619         int err;
620
621         if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
622                 return 0;
623
624         new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
625                                          fib->proto);
626         if (IS_ERR(new_tree)) {
627                 /* We failed to get a tree according to the required
628                  * prefix usage. However, the current tree might be still good
629                  * for us if our requirement is subset of the prefixes used
630                  * in the tree.
631                  */
632                 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
633                                                  &lpm_tree->prefix_usage))
634                         return 0;
635                 return PTR_ERR(new_tree);
636         }
637
638         /* Prevent packet loss by overwriting existing binding */
639         fib->lpm_tree = new_tree;
640         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
641         if (err)
642                 goto err_tree_bind;
643         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
644
645         return 0;
646
647 err_tree_bind:
648         fib->lpm_tree = lpm_tree;
649         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
650         return err;
651 }
652
653 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
654 {
655         struct mlxsw_sp_vr *vr;
656
657         tb_id = mlxsw_sp_fix_tb_id(tb_id);
658         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
659         if (!vr)
660                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
661         return vr;
662 }
663
664 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
665 {
666         if (!vr->rif_count && list_empty(&vr->fib4->node_list))
667                 mlxsw_sp_vr_destroy(vr);
668 }
669
670 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
671 {
672         struct mlxsw_sp_vr *vr;
673         u64 max_vrs;
674         int i;
675
676         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
677                 return -EIO;
678
679         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
680         mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
681                                        GFP_KERNEL);
682         if (!mlxsw_sp->router.vrs)
683                 return -ENOMEM;
684
685         for (i = 0; i < max_vrs; i++) {
686                 vr = &mlxsw_sp->router.vrs[i];
687                 vr->id = i;
688         }
689
690         return 0;
691 }
692
693 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
694
695 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
696 {
697         /* At this stage we're guaranteed not to have new incoming
698          * FIB notifications and the work queue is free from FIBs
699          * sitting on top of mlxsw netdevs. However, we can still
700          * have other FIBs queued. Flush the queue before flushing
701          * the device's tables. No need for locks, as we're the only
702          * writer.
703          */
704         mlxsw_core_flush_owq();
705         mlxsw_sp_router_fib_flush(mlxsw_sp);
706         kfree(mlxsw_sp->router.vrs);
707 }
708
709 struct mlxsw_sp_neigh_key {
710         struct neighbour *n;
711 };
712
713 struct mlxsw_sp_neigh_entry {
714         struct list_head rif_list_node;
715         struct rhash_head ht_node;
716         struct mlxsw_sp_neigh_key key;
717         u16 rif;
718         bool connected;
719         unsigned char ha[ETH_ALEN];
720         struct list_head nexthop_list; /* list of nexthops using
721                                         * this neigh entry
722                                         */
723         struct list_head nexthop_neighs_list_node;
724 };
725
726 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
727         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
728         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
729         .key_len = sizeof(struct mlxsw_sp_neigh_key),
730 };
731
732 static struct mlxsw_sp_neigh_entry *
733 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
734                            u16 rif)
735 {
736         struct mlxsw_sp_neigh_entry *neigh_entry;
737
738         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
739         if (!neigh_entry)
740                 return NULL;
741
742         neigh_entry->key.n = n;
743         neigh_entry->rif = rif;
744         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
745
746         return neigh_entry;
747 }
748
749 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
750 {
751         kfree(neigh_entry);
752 }
753
754 static int
755 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
756                             struct mlxsw_sp_neigh_entry *neigh_entry)
757 {
758         return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
759                                       &neigh_entry->ht_node,
760                                       mlxsw_sp_neigh_ht_params);
761 }
762
763 static void
764 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
765                             struct mlxsw_sp_neigh_entry *neigh_entry)
766 {
767         rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
768                                &neigh_entry->ht_node,
769                                mlxsw_sp_neigh_ht_params);
770 }
771
772 static struct mlxsw_sp_neigh_entry *
773 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
774 {
775         struct mlxsw_sp_neigh_entry *neigh_entry;
776         struct mlxsw_sp_rif *rif;
777         int err;
778
779         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
780         if (!rif)
781                 return ERR_PTR(-EINVAL);
782
783         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
784         if (!neigh_entry)
785                 return ERR_PTR(-ENOMEM);
786
787         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
788         if (err)
789                 goto err_neigh_entry_insert;
790
791         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
792
793         return neigh_entry;
794
795 err_neigh_entry_insert:
796         mlxsw_sp_neigh_entry_free(neigh_entry);
797         return ERR_PTR(err);
798 }
799
800 static void
801 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
802                              struct mlxsw_sp_neigh_entry *neigh_entry)
803 {
804         list_del(&neigh_entry->rif_list_node);
805         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
806         mlxsw_sp_neigh_entry_free(neigh_entry);
807 }
808
809 static struct mlxsw_sp_neigh_entry *
810 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
811 {
812         struct mlxsw_sp_neigh_key key;
813
814         key.n = n;
815         return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
816                                       &key, mlxsw_sp_neigh_ht_params);
817 }
818
819 static void
820 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
821 {
822         unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
823
824         mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
825 }
826
827 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
828                                                    char *rauhtd_pl,
829                                                    int ent_index)
830 {
831         struct net_device *dev;
832         struct neighbour *n;
833         __be32 dipn;
834         u32 dip;
835         u16 rif;
836
837         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
838
839         if (!mlxsw_sp->rifs[rif]) {
840                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
841                 return;
842         }
843
844         dipn = htonl(dip);
845         dev = mlxsw_sp->rifs[rif]->dev;
846         n = neigh_lookup(&arp_tbl, &dipn, dev);
847         if (!n) {
848                 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
849                            &dip);
850                 return;
851         }
852
853         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
854         neigh_event_send(n, NULL);
855         neigh_release(n);
856 }
857
858 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
859                                                    char *rauhtd_pl,
860                                                    int rec_index)
861 {
862         u8 num_entries;
863         int i;
864
865         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
866                                                                 rec_index);
867         /* Hardware starts counting at 0, so add 1. */
868         num_entries++;
869
870         /* Each record consists of several neighbour entries. */
871         for (i = 0; i < num_entries; i++) {
872                 int ent_index;
873
874                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
875                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
876                                                        ent_index);
877         }
878
879 }
880
881 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
882                                               char *rauhtd_pl, int rec_index)
883 {
884         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
885         case MLXSW_REG_RAUHTD_TYPE_IPV4:
886                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
887                                                        rec_index);
888                 break;
889         case MLXSW_REG_RAUHTD_TYPE_IPV6:
890                 WARN_ON_ONCE(1);
891                 break;
892         }
893 }
894
895 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
896 {
897         u8 num_rec, last_rec_index, num_entries;
898
899         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
900         last_rec_index = num_rec - 1;
901
902         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
903                 return false;
904         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
905             MLXSW_REG_RAUHTD_TYPE_IPV6)
906                 return true;
907
908         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
909                                                                 last_rec_index);
910         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
911                 return true;
912         return false;
913 }
914
915 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
916 {
917         char *rauhtd_pl;
918         u8 num_rec;
919         int i, err;
920
921         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
922         if (!rauhtd_pl)
923                 return -ENOMEM;
924
925         /* Make sure the neighbour's netdev isn't removed in the
926          * process.
927          */
928         rtnl_lock();
929         do {
930                 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
931                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
932                                       rauhtd_pl);
933                 if (err) {
934                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
935                         break;
936                 }
937                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
938                 for (i = 0; i < num_rec; i++)
939                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
940                                                           i);
941         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
942         rtnl_unlock();
943
944         kfree(rauhtd_pl);
945         return err;
946 }
947
948 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
949 {
950         struct mlxsw_sp_neigh_entry *neigh_entry;
951
952         /* Take RTNL mutex here to prevent lists from changes */
953         rtnl_lock();
954         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
955                             nexthop_neighs_list_node)
956                 /* If this neigh have nexthops, make the kernel think this neigh
957                  * is active regardless of the traffic.
958                  */
959                 neigh_event_send(neigh_entry->key.n, NULL);
960         rtnl_unlock();
961 }
962
963 static void
964 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
965 {
966         unsigned long interval = mlxsw_sp->router.neighs_update.interval;
967
968         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
969                                msecs_to_jiffies(interval));
970 }
971
972 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
973 {
974         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
975                                                  router.neighs_update.dw.work);
976         int err;
977
978         err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
979         if (err)
980                 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
981
982         mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
983
984         mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
985 }
986
987 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
988 {
989         struct mlxsw_sp_neigh_entry *neigh_entry;
990         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
991                                                  router.nexthop_probe_dw.work);
992
993         /* Iterate over nexthop neighbours, find those who are unresolved and
994          * send arp on them. This solves the chicken-egg problem when
995          * the nexthop wouldn't get offloaded until the neighbor is resolved
996          * but it wouldn't get resolved ever in case traffic is flowing in HW
997          * using different nexthop.
998          *
999          * Take RTNL mutex here to prevent lists from changes.
1000          */
1001         rtnl_lock();
1002         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
1003                             nexthop_neighs_list_node)
1004                 if (!neigh_entry->connected)
1005                         neigh_event_send(neigh_entry->key.n, NULL);
1006         rtnl_unlock();
1007
1008         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
1009                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1010 }
1011
1012 static void
1013 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1014                               struct mlxsw_sp_neigh_entry *neigh_entry,
1015                               bool removing);
1016
1017 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1018 {
1019         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1020                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1021 }
1022
1023 static void
1024 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1025                                 struct mlxsw_sp_neigh_entry *neigh_entry,
1026                                 enum mlxsw_reg_rauht_op op)
1027 {
1028         struct neighbour *n = neigh_entry->key.n;
1029         u32 dip = ntohl(*((__be32 *) n->primary_key));
1030         char rauht_pl[MLXSW_REG_RAUHT_LEN];
1031
1032         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1033                               dip);
1034         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1035 }
1036
1037 static void
1038 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1039                             struct mlxsw_sp_neigh_entry *neigh_entry,
1040                             bool adding)
1041 {
1042         if (!adding && !neigh_entry->connected)
1043                 return;
1044         neigh_entry->connected = adding;
1045         if (neigh_entry->key.n->tbl == &arp_tbl)
1046                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1047                                                 mlxsw_sp_rauht_op(adding));
1048         else
1049                 WARN_ON_ONCE(1);
1050 }
1051
1052 struct mlxsw_sp_neigh_event_work {
1053         struct work_struct work;
1054         struct mlxsw_sp *mlxsw_sp;
1055         struct neighbour *n;
1056 };
1057
1058 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
1059 {
1060         struct mlxsw_sp_neigh_event_work *neigh_work =
1061                 container_of(work, struct mlxsw_sp_neigh_event_work, work);
1062         struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
1063         struct mlxsw_sp_neigh_entry *neigh_entry;
1064         struct neighbour *n = neigh_work->n;
1065         unsigned char ha[ETH_ALEN];
1066         bool entry_connected;
1067         u8 nud_state, dead;
1068
1069         /* If these parameters are changed after we release the lock,
1070          * then we are guaranteed to receive another event letting us
1071          * know about it.
1072          */
1073         read_lock_bh(&n->lock);
1074         memcpy(ha, n->ha, ETH_ALEN);
1075         nud_state = n->nud_state;
1076         dead = n->dead;
1077         read_unlock_bh(&n->lock);
1078
1079         rtnl_lock();
1080         entry_connected = nud_state & NUD_VALID && !dead;
1081         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1082         if (!entry_connected && !neigh_entry)
1083                 goto out;
1084         if (!neigh_entry) {
1085                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1086                 if (IS_ERR(neigh_entry))
1087                         goto out;
1088         }
1089
1090         memcpy(neigh_entry->ha, ha, ETH_ALEN);
1091         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
1092         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
1093
1094         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1095                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1096
1097 out:
1098         rtnl_unlock();
1099         neigh_release(n);
1100         kfree(neigh_work);
1101 }
1102
1103 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
1104                                    unsigned long event, void *ptr)
1105 {
1106         struct mlxsw_sp_neigh_event_work *neigh_work;
1107         struct mlxsw_sp_port *mlxsw_sp_port;
1108         struct mlxsw_sp *mlxsw_sp;
1109         unsigned long interval;
1110         struct neigh_parms *p;
1111         struct neighbour *n;
1112
1113         switch (event) {
1114         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1115                 p = ptr;
1116
1117                 /* We don't care about changes in the default table. */
1118                 if (!p->dev || p->tbl != &arp_tbl)
1119                         return NOTIFY_DONE;
1120
1121                 /* We are in atomic context and can't take RTNL mutex,
1122                  * so use RCU variant to walk the device chain.
1123                  */
1124                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1125                 if (!mlxsw_sp_port)
1126                         return NOTIFY_DONE;
1127
1128                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1129                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1130                 mlxsw_sp->router.neighs_update.interval = interval;
1131
1132                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1133                 break;
1134         case NETEVENT_NEIGH_UPDATE:
1135                 n = ptr;
1136
1137                 if (n->tbl != &arp_tbl)
1138                         return NOTIFY_DONE;
1139
1140                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1141                 if (!mlxsw_sp_port)
1142                         return NOTIFY_DONE;
1143
1144                 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1145                 if (!neigh_work) {
1146                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
1147                         return NOTIFY_BAD;
1148                 }
1149
1150                 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1151                 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1152                 neigh_work->n = n;
1153
1154                 /* Take a reference to ensure the neighbour won't be
1155                  * destructed until we drop the reference in delayed
1156                  * work.
1157                  */
1158                 neigh_clone(n);
1159                 mlxsw_core_schedule_work(&neigh_work->work);
1160                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1161                 break;
1162         }
1163
1164         return NOTIFY_DONE;
1165 }
1166
1167 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1168 {
1169         int err;
1170
1171         err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1172                               &mlxsw_sp_neigh_ht_params);
1173         if (err)
1174                 return err;
1175
1176         /* Initialize the polling interval according to the default
1177          * table.
1178          */
1179         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1180
1181         /* Create the delayed works for the activity_update */
1182         INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1183                           mlxsw_sp_router_neighs_update_work);
1184         INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1185                           mlxsw_sp_router_probe_unresolved_nexthops);
1186         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1187         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1188         return 0;
1189 }
1190
1191 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1192 {
1193         cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1194         cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1195         rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1196 }
1197
1198 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1199                                     const struct mlxsw_sp_rif *rif)
1200 {
1201         char rauht_pl[MLXSW_REG_RAUHT_LEN];
1202
1203         mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1204                              rif->rif_index, rif->addr);
1205         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1206 }
1207
1208 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1209                                          struct mlxsw_sp_rif *rif)
1210 {
1211         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1212
1213         mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
1214         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
1215                                  rif_list_node)
1216                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1217 }
1218
1219 struct mlxsw_sp_nexthop_key {
1220         struct fib_nh *fib_nh;
1221 };
1222
1223 struct mlxsw_sp_nexthop {
1224         struct list_head neigh_list_node; /* member of neigh entry list */
1225         struct list_head rif_list_node;
1226         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1227                                                 * this belongs to
1228                                                 */
1229         struct rhash_head ht_node;
1230         struct mlxsw_sp_nexthop_key key;
1231         struct mlxsw_sp_rif *rif;
1232         u8 should_offload:1, /* set indicates this neigh is connected and
1233                               * should be put to KVD linear area of this group.
1234                               */
1235            offloaded:1, /* set in case the neigh is actually put into
1236                          * KVD linear area of this group.
1237                          */
1238            update:1; /* set indicates that MAC of this neigh should be
1239                       * updated in HW
1240                       */
1241         struct mlxsw_sp_neigh_entry *neigh_entry;
1242 };
1243
1244 struct mlxsw_sp_nexthop_group_key {
1245         struct fib_info *fi;
1246 };
1247
1248 struct mlxsw_sp_nexthop_group {
1249         struct rhash_head ht_node;
1250         struct list_head fib_list; /* list of fib entries that use this group */
1251         struct mlxsw_sp_nexthop_group_key key;
1252         u8 adj_index_valid:1,
1253            gateway:1; /* routes using the group use a gateway */
1254         u32 adj_index;
1255         u16 ecmp_size;
1256         u16 count;
1257         struct mlxsw_sp_nexthop nexthops[0];
1258 #define nh_rif  nexthops[0].rif
1259 };
1260
1261 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1262         .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1263         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1264         .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1265 };
1266
1267 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1268                                          struct mlxsw_sp_nexthop_group *nh_grp)
1269 {
1270         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1271                                       &nh_grp->ht_node,
1272                                       mlxsw_sp_nexthop_group_ht_params);
1273 }
1274
1275 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1276                                           struct mlxsw_sp_nexthop_group *nh_grp)
1277 {
1278         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1279                                &nh_grp->ht_node,
1280                                mlxsw_sp_nexthop_group_ht_params);
1281 }
1282
1283 static struct mlxsw_sp_nexthop_group *
1284 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1285                               struct mlxsw_sp_nexthop_group_key key)
1286 {
1287         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1288                                       mlxsw_sp_nexthop_group_ht_params);
1289 }
1290
1291 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1292         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1293         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1294         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1295 };
1296
1297 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1298                                    struct mlxsw_sp_nexthop *nh)
1299 {
1300         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1301                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1302 }
1303
1304 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1305                                     struct mlxsw_sp_nexthop *nh)
1306 {
1307         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1308                                mlxsw_sp_nexthop_ht_params);
1309 }
1310
1311 static struct mlxsw_sp_nexthop *
1312 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1313                         struct mlxsw_sp_nexthop_key key)
1314 {
1315         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1316                                       mlxsw_sp_nexthop_ht_params);
1317 }
1318
1319 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1320                                              const struct mlxsw_sp_fib *fib,
1321                                              u32 adj_index, u16 ecmp_size,
1322                                              u32 new_adj_index,
1323                                              u16 new_ecmp_size)
1324 {
1325         char raleu_pl[MLXSW_REG_RALEU_LEN];
1326
1327         mlxsw_reg_raleu_pack(raleu_pl,
1328                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
1329                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
1330                              new_ecmp_size);
1331         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1332 }
1333
1334 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1335                                           struct mlxsw_sp_nexthop_group *nh_grp,
1336                                           u32 old_adj_index, u16 old_ecmp_size)
1337 {
1338         struct mlxsw_sp_fib_entry *fib_entry;
1339         struct mlxsw_sp_fib *fib = NULL;
1340         int err;
1341
1342         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1343                 if (fib == fib_entry->fib_node->fib)
1344                         continue;
1345                 fib = fib_entry->fib_node->fib;
1346                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
1347                                                         old_adj_index,
1348                                                         old_ecmp_size,
1349                                                         nh_grp->adj_index,
1350                                                         nh_grp->ecmp_size);
1351                 if (err)
1352                         return err;
1353         }
1354         return 0;
1355 }
1356
1357 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1358                                        struct mlxsw_sp_nexthop *nh)
1359 {
1360         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1361         char ratr_pl[MLXSW_REG_RATR_LEN];
1362
1363         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1364                             true, adj_index, neigh_entry->rif);
1365         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1366         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1367 }
1368
1369 static int
1370 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1371                                   struct mlxsw_sp_nexthop_group *nh_grp,
1372                                   bool reallocate)
1373 {
1374         u32 adj_index = nh_grp->adj_index; /* base */
1375         struct mlxsw_sp_nexthop *nh;
1376         int i;
1377         int err;
1378
1379         for (i = 0; i < nh_grp->count; i++) {
1380                 nh = &nh_grp->nexthops[i];
1381
1382                 if (!nh->should_offload) {
1383                         nh->offloaded = 0;
1384                         continue;
1385                 }
1386
1387                 if (nh->update || reallocate) {
1388                         err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1389                                                           adj_index, nh);
1390                         if (err)
1391                                 return err;
1392                         nh->update = 0;
1393                         nh->offloaded = 1;
1394                 }
1395                 adj_index++;
1396         }
1397         return 0;
1398 }
1399
1400 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1401                                      struct mlxsw_sp_fib_entry *fib_entry);
1402
1403 static int
1404 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1405                                     struct mlxsw_sp_nexthop_group *nh_grp)
1406 {
1407         struct mlxsw_sp_fib_entry *fib_entry;
1408         int err;
1409
1410         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1411                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1412                 if (err)
1413                         return err;
1414         }
1415         return 0;
1416 }
1417
1418 static void
1419 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1420                                struct mlxsw_sp_nexthop_group *nh_grp)
1421 {
1422         struct mlxsw_sp_nexthop *nh;
1423         bool offload_change = false;
1424         u32 adj_index;
1425         u16 ecmp_size = 0;
1426         bool old_adj_index_valid;
1427         u32 old_adj_index;
1428         u16 old_ecmp_size;
1429         int i;
1430         int err;
1431
1432         if (!nh_grp->gateway) {
1433                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1434                 return;
1435         }
1436
1437         for (i = 0; i < nh_grp->count; i++) {
1438                 nh = &nh_grp->nexthops[i];
1439
1440                 if (nh->should_offload ^ nh->offloaded) {
1441                         offload_change = true;
1442                         if (nh->should_offload)
1443                                 nh->update = 1;
1444                 }
1445                 if (nh->should_offload)
1446                         ecmp_size++;
1447         }
1448         if (!offload_change) {
1449                 /* Nothing was added or removed, so no need to reallocate. Just
1450                  * update MAC on existing adjacency indexes.
1451                  */
1452                 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1453                                                         false);
1454                 if (err) {
1455                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1456                         goto set_trap;
1457                 }
1458                 return;
1459         }
1460         if (!ecmp_size)
1461                 /* No neigh of this group is connected so we just set
1462                  * the trap and let everthing flow through kernel.
1463                  */
1464                 goto set_trap;
1465
1466         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
1467         if (err) {
1468                 /* We ran out of KVD linear space, just set the
1469                  * trap and let everything flow through kernel.
1470                  */
1471                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1472                 goto set_trap;
1473         }
1474         old_adj_index_valid = nh_grp->adj_index_valid;
1475         old_adj_index = nh_grp->adj_index;
1476         old_ecmp_size = nh_grp->ecmp_size;
1477         nh_grp->adj_index_valid = 1;
1478         nh_grp->adj_index = adj_index;
1479         nh_grp->ecmp_size = ecmp_size;
1480         err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1481         if (err) {
1482                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1483                 goto set_trap;
1484         }
1485
1486         if (!old_adj_index_valid) {
1487                 /* The trap was set for fib entries, so we have to call
1488                  * fib entry update to unset it and use adjacency index.
1489                  */
1490                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1491                 if (err) {
1492                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1493                         goto set_trap;
1494                 }
1495                 return;
1496         }
1497
1498         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1499                                              old_adj_index, old_ecmp_size);
1500         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1501         if (err) {
1502                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1503                 goto set_trap;
1504         }
1505         return;
1506
1507 set_trap:
1508         old_adj_index_valid = nh_grp->adj_index_valid;
1509         nh_grp->adj_index_valid = 0;
1510         for (i = 0; i < nh_grp->count; i++) {
1511                 nh = &nh_grp->nexthops[i];
1512                 nh->offloaded = 0;
1513         }
1514         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1515         if (err)
1516                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1517         if (old_adj_index_valid)
1518                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1519 }
1520
1521 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1522                                             bool removing)
1523 {
1524         if (!removing && !nh->should_offload)
1525                 nh->should_offload = 1;
1526         else if (removing && nh->offloaded)
1527                 nh->should_offload = 0;
1528         nh->update = 1;
1529 }
1530
1531 static void
1532 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1533                               struct mlxsw_sp_neigh_entry *neigh_entry,
1534                               bool removing)
1535 {
1536         struct mlxsw_sp_nexthop *nh;
1537
1538         list_for_each_entry(nh, &neigh_entry->nexthop_list,
1539                             neigh_list_node) {
1540                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1541                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1542         }
1543 }
1544
1545 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1546                                       struct mlxsw_sp_rif *rif)
1547 {
1548         if (nh->rif)
1549                 return;
1550
1551         nh->rif = rif;
1552         list_add(&nh->rif_list_node, &rif->nexthop_list);
1553 }
1554
1555 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1556 {
1557         if (!nh->rif)
1558                 return;
1559
1560         list_del(&nh->rif_list_node);
1561         nh->rif = NULL;
1562 }
1563
1564 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1565                                        struct mlxsw_sp_nexthop *nh)
1566 {
1567         struct mlxsw_sp_neigh_entry *neigh_entry;
1568         struct fib_nh *fib_nh = nh->key.fib_nh;
1569         struct neighbour *n;
1570         u8 nud_state, dead;
1571         int err;
1572
1573         if (!nh->nh_grp->gateway || nh->neigh_entry)
1574                 return 0;
1575
1576         /* Take a reference of neigh here ensuring that neigh would
1577          * not be detructed before the nexthop entry is finished.
1578          * The reference is taken either in neigh_lookup() or
1579          * in neigh_create() in case n is not found.
1580          */
1581         n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1582         if (!n) {
1583                 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1584                 if (IS_ERR(n))
1585                         return PTR_ERR(n);
1586                 neigh_event_send(n, NULL);
1587         }
1588         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1589         if (!neigh_entry) {
1590                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1591                 if (IS_ERR(neigh_entry)) {
1592                         err = -EINVAL;
1593                         goto err_neigh_entry_create;
1594                 }
1595         }
1596
1597         /* If that is the first nexthop connected to that neigh, add to
1598          * nexthop_neighs_list
1599          */
1600         if (list_empty(&neigh_entry->nexthop_list))
1601                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1602                               &mlxsw_sp->router.nexthop_neighs_list);
1603
1604         nh->neigh_entry = neigh_entry;
1605         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1606         read_lock_bh(&n->lock);
1607         nud_state = n->nud_state;
1608         dead = n->dead;
1609         read_unlock_bh(&n->lock);
1610         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1611
1612         return 0;
1613
1614 err_neigh_entry_create:
1615         neigh_release(n);
1616         return err;
1617 }
1618
1619 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1620                                         struct mlxsw_sp_nexthop *nh)
1621 {
1622         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1623         struct neighbour *n;
1624
1625         if (!neigh_entry)
1626                 return;
1627         n = neigh_entry->key.n;
1628
1629         __mlxsw_sp_nexthop_neigh_update(nh, true);
1630         list_del(&nh->neigh_list_node);
1631         nh->neigh_entry = NULL;
1632
1633         /* If that is the last nexthop connected to that neigh, remove from
1634          * nexthop_neighs_list
1635          */
1636         if (list_empty(&neigh_entry->nexthop_list))
1637                 list_del(&neigh_entry->nexthop_neighs_list_node);
1638
1639         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1640                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1641
1642         neigh_release(n);
1643 }
1644
1645 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1646                                  struct mlxsw_sp_nexthop_group *nh_grp,
1647                                  struct mlxsw_sp_nexthop *nh,
1648                                  struct fib_nh *fib_nh)
1649 {
1650         struct net_device *dev = fib_nh->nh_dev;
1651         struct in_device *in_dev;
1652         struct mlxsw_sp_rif *rif;
1653         int err;
1654
1655         nh->nh_grp = nh_grp;
1656         nh->key.fib_nh = fib_nh;
1657         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1658         if (err)
1659                 return err;
1660
1661         if (!dev)
1662                 return 0;
1663
1664         in_dev = __in_dev_get_rtnl(dev);
1665         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1666             fib_nh->nh_flags & RTNH_F_LINKDOWN)
1667                 return 0;
1668
1669         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1670         if (!rif)
1671                 return 0;
1672         mlxsw_sp_nexthop_rif_init(nh, rif);
1673
1674         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1675         if (err)
1676                 goto err_nexthop_neigh_init;
1677
1678         return 0;
1679
1680 err_nexthop_neigh_init:
1681         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1682         return err;
1683 }
1684
1685 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1686                                   struct mlxsw_sp_nexthop *nh)
1687 {
1688         mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1689         mlxsw_sp_nexthop_rif_fini(nh);
1690         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1691 }
1692
1693 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1694                                    unsigned long event, struct fib_nh *fib_nh)
1695 {
1696         struct mlxsw_sp_nexthop_key key;
1697         struct mlxsw_sp_nexthop *nh;
1698         struct mlxsw_sp_rif *rif;
1699
1700         if (mlxsw_sp->router.aborted)
1701                 return;
1702
1703         key.fib_nh = fib_nh;
1704         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1705         if (WARN_ON_ONCE(!nh))
1706                 return;
1707
1708         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1709         if (!rif)
1710                 return;
1711
1712         switch (event) {
1713         case FIB_EVENT_NH_ADD:
1714                 mlxsw_sp_nexthop_rif_init(nh, rif);
1715                 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1716                 break;
1717         case FIB_EVENT_NH_DEL:
1718                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1719                 mlxsw_sp_nexthop_rif_fini(nh);
1720                 break;
1721         }
1722
1723         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1724 }
1725
1726 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1727                                            struct mlxsw_sp_rif *rif)
1728 {
1729         struct mlxsw_sp_nexthop *nh, *tmp;
1730
1731         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
1732                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1733                 mlxsw_sp_nexthop_rif_fini(nh);
1734                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1735         }
1736 }
1737
1738 static struct mlxsw_sp_nexthop_group *
1739 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1740 {
1741         struct mlxsw_sp_nexthop_group *nh_grp;
1742         struct mlxsw_sp_nexthop *nh;
1743         struct fib_nh *fib_nh;
1744         size_t alloc_size;
1745         int i;
1746         int err;
1747
1748         alloc_size = sizeof(*nh_grp) +
1749                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1750         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1751         if (!nh_grp)
1752                 return ERR_PTR(-ENOMEM);
1753         INIT_LIST_HEAD(&nh_grp->fib_list);
1754         nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1755         nh_grp->count = fi->fib_nhs;
1756         nh_grp->key.fi = fi;
1757         for (i = 0; i < nh_grp->count; i++) {
1758                 nh = &nh_grp->nexthops[i];
1759                 fib_nh = &fi->fib_nh[i];
1760                 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1761                 if (err)
1762                         goto err_nexthop_init;
1763         }
1764         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1765         if (err)
1766                 goto err_nexthop_group_insert;
1767         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1768         return nh_grp;
1769
1770 err_nexthop_group_insert:
1771 err_nexthop_init:
1772         for (i--; i >= 0; i--) {
1773                 nh = &nh_grp->nexthops[i];
1774                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1775         }
1776         kfree(nh_grp);
1777         return ERR_PTR(err);
1778 }
1779
1780 static void
1781 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1782                                struct mlxsw_sp_nexthop_group *nh_grp)
1783 {
1784         struct mlxsw_sp_nexthop *nh;
1785         int i;
1786
1787         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1788         for (i = 0; i < nh_grp->count; i++) {
1789                 nh = &nh_grp->nexthops[i];
1790                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1791         }
1792         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1793         WARN_ON_ONCE(nh_grp->adj_index_valid);
1794         kfree(nh_grp);
1795 }
1796
1797 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1798                                       struct mlxsw_sp_fib_entry *fib_entry,
1799                                       struct fib_info *fi)
1800 {
1801         struct mlxsw_sp_nexthop_group_key key;
1802         struct mlxsw_sp_nexthop_group *nh_grp;
1803
1804         key.fi = fi;
1805         nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1806         if (!nh_grp) {
1807                 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1808                 if (IS_ERR(nh_grp))
1809                         return PTR_ERR(nh_grp);
1810         }
1811         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1812         fib_entry->nh_group = nh_grp;
1813         return 0;
1814 }
1815
1816 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1817                                        struct mlxsw_sp_fib_entry *fib_entry)
1818 {
1819         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1820
1821         list_del(&fib_entry->nexthop_group_node);
1822         if (!list_empty(&nh_grp->fib_list))
1823                 return;
1824         mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1825 }
1826
1827 static bool
1828 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1829 {
1830         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1831
1832         if (fib_entry->params.tos)
1833                 return false;
1834
1835         switch (fib_entry->type) {
1836         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1837                 return !!nh_group->adj_index_valid;
1838         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1839                 return !!nh_group->nh_rif;
1840         default:
1841                 return false;
1842         }
1843 }
1844
1845 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1846 {
1847         fib_entry->offloaded = true;
1848
1849         switch (fib_entry->fib_node->fib->proto) {
1850         case MLXSW_SP_L3_PROTO_IPV4:
1851                 fib_info_offload_inc(fib_entry->nh_group->key.fi);
1852                 break;
1853         case MLXSW_SP_L3_PROTO_IPV6:
1854                 WARN_ON_ONCE(1);
1855         }
1856 }
1857
1858 static void
1859 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1860 {
1861         switch (fib_entry->fib_node->fib->proto) {
1862         case MLXSW_SP_L3_PROTO_IPV4:
1863                 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1864                 break;
1865         case MLXSW_SP_L3_PROTO_IPV6:
1866                 WARN_ON_ONCE(1);
1867         }
1868
1869         fib_entry->offloaded = false;
1870 }
1871
1872 static void
1873 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1874                                    enum mlxsw_reg_ralue_op op, int err)
1875 {
1876         switch (op) {
1877         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1878                 if (!fib_entry->offloaded)
1879                         return;
1880                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1881         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1882                 if (err)
1883                         return;
1884                 if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1885                     !fib_entry->offloaded)
1886                         mlxsw_sp_fib_entry_offload_set(fib_entry);
1887                 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1888                          fib_entry->offloaded)
1889                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
1890                 return;
1891         default:
1892                 return;
1893         }
1894 }
1895
1896 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1897                                          struct mlxsw_sp_fib_entry *fib_entry,
1898                                          enum mlxsw_reg_ralue_op op)
1899 {
1900         char ralue_pl[MLXSW_REG_RALUE_LEN];
1901         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1902         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1903         enum mlxsw_reg_ralue_trap_action trap_action;
1904         u16 trap_id = 0;
1905         u32 adjacency_index = 0;
1906         u16 ecmp_size = 0;
1907
1908         /* In case the nexthop group adjacency index is valid, use it
1909          * with provided ECMP size. Otherwise, setup trap and pass
1910          * traffic to kernel.
1911          */
1912         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1913                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1914                 adjacency_index = fib_entry->nh_group->adj_index;
1915                 ecmp_size = fib_entry->nh_group->ecmp_size;
1916         } else {
1917                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1918                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1919         }
1920
1921         mlxsw_reg_ralue_pack4(ralue_pl,
1922                               (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1923                               fib->vr->id, fib_entry->fib_node->key.prefix_len,
1924                               *p_dip);
1925         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1926                                         adjacency_index, ecmp_size);
1927         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1928 }
1929
1930 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1931                                         struct mlxsw_sp_fib_entry *fib_entry,
1932                                         enum mlxsw_reg_ralue_op op)
1933 {
1934         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
1935         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1936         enum mlxsw_reg_ralue_trap_action trap_action;
1937         char ralue_pl[MLXSW_REG_RALUE_LEN];
1938         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1939         u16 trap_id = 0;
1940         u16 rif_index = 0;
1941
1942         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1943                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1944                 rif_index = rif->rif_index;
1945         } else {
1946                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1947                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1948         }
1949
1950         mlxsw_reg_ralue_pack4(ralue_pl,
1951                               (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1952                               fib->vr->id, fib_entry->fib_node->key.prefix_len,
1953                               *p_dip);
1954         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
1955                                        rif_index);
1956         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1957 }
1958
1959 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1960                                        struct mlxsw_sp_fib_entry *fib_entry,
1961                                        enum mlxsw_reg_ralue_op op)
1962 {
1963         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1964         char ralue_pl[MLXSW_REG_RALUE_LEN];
1965         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1966
1967         mlxsw_reg_ralue_pack4(ralue_pl,
1968                               (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1969                               fib->vr->id, fib_entry->fib_node->key.prefix_len,
1970                               *p_dip);
1971         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1972         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1973 }
1974
1975 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1976                                   struct mlxsw_sp_fib_entry *fib_entry,
1977                                   enum mlxsw_reg_ralue_op op)
1978 {
1979         switch (fib_entry->type) {
1980         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1981                 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1982         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1983                 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1984         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1985                 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1986         }
1987         return -EINVAL;
1988 }
1989
1990 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1991                                  struct mlxsw_sp_fib_entry *fib_entry,
1992                                  enum mlxsw_reg_ralue_op op)
1993 {
1994         int err = -EINVAL;
1995
1996         switch (fib_entry->fib_node->fib->proto) {
1997         case MLXSW_SP_L3_PROTO_IPV4:
1998                 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1999                 break;
2000         case MLXSW_SP_L3_PROTO_IPV6:
2001                 return err;
2002         }
2003         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
2004         return err;
2005 }
2006
2007 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
2008                                      struct mlxsw_sp_fib_entry *fib_entry)
2009 {
2010         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2011                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
2012 }
2013
2014 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
2015                                   struct mlxsw_sp_fib_entry *fib_entry)
2016 {
2017         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2018                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
2019 }
2020
2021 static int
2022 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
2023                              const struct fib_entry_notifier_info *fen_info,
2024                              struct mlxsw_sp_fib_entry *fib_entry)
2025 {
2026         struct fib_info *fi = fen_info->fi;
2027
2028         switch (fen_info->type) {
2029         case RTN_BROADCAST: /* fall through */
2030         case RTN_LOCAL:
2031                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2032                 return 0;
2033         case RTN_UNREACHABLE: /* fall through */
2034         case RTN_BLACKHOLE: /* fall through */
2035         case RTN_PROHIBIT:
2036                 /* Packets hitting these routes need to be trapped, but
2037                  * can do so with a lower priority than packets directed
2038                  * at the host, so use action type local instead of trap.
2039                  */
2040                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2041                 return 0;
2042         case RTN_UNICAST:
2043                 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
2044                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2045                 else
2046                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
2047                 return 0;
2048         default:
2049                 return -EINVAL;
2050         }
2051 }
2052
2053 static struct mlxsw_sp_fib_entry *
2054 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
2055                            struct mlxsw_sp_fib_node *fib_node,
2056                            const struct fib_entry_notifier_info *fen_info)
2057 {
2058         struct mlxsw_sp_fib_entry *fib_entry;
2059         int err;
2060
2061         fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
2062         if (!fib_entry) {
2063                 err = -ENOMEM;
2064                 goto err_fib_entry_alloc;
2065         }
2066
2067         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
2068         if (err)
2069                 goto err_fib4_entry_type_set;
2070
2071         err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
2072         if (err)
2073                 goto err_nexthop_group_get;
2074
2075         fib_entry->params.prio = fen_info->fi->fib_priority;
2076         fib_entry->params.tb_id = fen_info->tb_id;
2077         fib_entry->params.type = fen_info->type;
2078         fib_entry->params.tos = fen_info->tos;
2079
2080         fib_entry->fib_node = fib_node;
2081
2082         return fib_entry;
2083
2084 err_nexthop_group_get:
2085 err_fib4_entry_type_set:
2086         kfree(fib_entry);
2087 err_fib_entry_alloc:
2088         return ERR_PTR(err);
2089 }
2090
2091 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2092                                         struct mlxsw_sp_fib_entry *fib_entry)
2093 {
2094         mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
2095         kfree(fib_entry);
2096 }
2097
2098 static struct mlxsw_sp_fib_node *
2099 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2100                        const struct fib_entry_notifier_info *fen_info);
2101
2102 static struct mlxsw_sp_fib_entry *
2103 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
2104                            const struct fib_entry_notifier_info *fen_info)
2105 {
2106         struct mlxsw_sp_fib_entry *fib_entry;
2107         struct mlxsw_sp_fib_node *fib_node;
2108
2109         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2110         if (IS_ERR(fib_node))
2111                 return NULL;
2112
2113         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2114                 if (fib_entry->params.tb_id == fen_info->tb_id &&
2115                     fib_entry->params.tos == fen_info->tos &&
2116                     fib_entry->params.type == fen_info->type &&
2117                     fib_entry->nh_group->key.fi == fen_info->fi) {
2118                         return fib_entry;
2119                 }
2120         }
2121
2122         return NULL;
2123 }
2124
2125 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
2126         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
2127         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
2128         .key_len = sizeof(struct mlxsw_sp_fib_key),
2129         .automatic_shrinking = true,
2130 };
2131
2132 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
2133                                     struct mlxsw_sp_fib_node *fib_node)
2134 {
2135         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2136                                       mlxsw_sp_fib_ht_params);
2137 }
2138
2139 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2140                                      struct mlxsw_sp_fib_node *fib_node)
2141 {
2142         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2143                                mlxsw_sp_fib_ht_params);
2144 }
2145
2146 static struct mlxsw_sp_fib_node *
2147 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2148                          size_t addr_len, unsigned char prefix_len)
2149 {
2150         struct mlxsw_sp_fib_key key;
2151
2152         memset(&key, 0, sizeof(key));
2153         memcpy(key.addr, addr, addr_len);
2154         key.prefix_len = prefix_len;
2155         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2156 }
2157
2158 static struct mlxsw_sp_fib_node *
2159 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
2160                          size_t addr_len, unsigned char prefix_len)
2161 {
2162         struct mlxsw_sp_fib_node *fib_node;
2163
2164         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2165         if (!fib_node)
2166                 return NULL;
2167
2168         INIT_LIST_HEAD(&fib_node->entry_list);
2169         list_add(&fib_node->list, &fib->node_list);
2170         memcpy(fib_node->key.addr, addr, addr_len);
2171         fib_node->key.prefix_len = prefix_len;
2172
2173         return fib_node;
2174 }
2175
2176 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2177 {
2178         list_del(&fib_node->list);
2179         WARN_ON(!list_empty(&fib_node->entry_list));
2180         kfree(fib_node);
2181 }
2182
2183 static bool
2184 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2185                                  const struct mlxsw_sp_fib_entry *fib_entry)
2186 {
2187         return list_first_entry(&fib_node->entry_list,
2188                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
2189 }
2190
2191 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2192 {
2193         unsigned char prefix_len = fib_node->key.prefix_len;
2194         struct mlxsw_sp_fib *fib = fib_node->fib;
2195
2196         if (fib->prefix_ref_count[prefix_len]++ == 0)
2197                 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2198 }
2199
2200 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2201 {
2202         unsigned char prefix_len = fib_node->key.prefix_len;
2203         struct mlxsw_sp_fib *fib = fib_node->fib;
2204
2205         if (--fib->prefix_ref_count[prefix_len] == 0)
2206                 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2207 }
2208
2209 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
2210                                   struct mlxsw_sp_fib_node *fib_node,
2211                                   struct mlxsw_sp_fib *fib)
2212 {
2213         struct mlxsw_sp_prefix_usage req_prefix_usage;
2214         struct mlxsw_sp_lpm_tree *lpm_tree;
2215         int err;
2216
2217         err = mlxsw_sp_fib_node_insert(fib, fib_node);
2218         if (err)
2219                 return err;
2220         fib_node->fib = fib;
2221
2222         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage);
2223         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
2224
2225         if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2226                 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib,
2227                                                  &req_prefix_usage);
2228                 if (err)
2229                         goto err_tree_check;
2230         } else {
2231                 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2232                                                  fib->proto);
2233                 if (IS_ERR(lpm_tree))
2234                         return PTR_ERR(lpm_tree);
2235                 fib->lpm_tree = lpm_tree;
2236                 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
2237                 if (err)
2238                         goto err_tree_bind;
2239         }
2240
2241         mlxsw_sp_fib_node_prefix_inc(fib_node);
2242
2243         return 0;
2244
2245 err_tree_bind:
2246         fib->lpm_tree = NULL;
2247         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2248 err_tree_check:
2249         fib_node->fib = NULL;
2250         mlxsw_sp_fib_node_remove(fib, fib_node);
2251         return err;
2252 }
2253
2254 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
2255                                    struct mlxsw_sp_fib_node *fib_node)
2256 {
2257         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
2258         struct mlxsw_sp_fib *fib = fib_node->fib;
2259
2260         mlxsw_sp_fib_node_prefix_dec(fib_node);
2261
2262         if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2263                 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
2264                 fib->lpm_tree = NULL;
2265                 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2266         } else {
2267                 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage);
2268         }
2269
2270         fib_node->fib = NULL;
2271         mlxsw_sp_fib_node_remove(fib, fib_node);
2272 }
2273
2274 static struct mlxsw_sp_fib_node *
2275 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2276                        const struct fib_entry_notifier_info *fen_info)
2277 {
2278         struct mlxsw_sp_fib_node *fib_node;
2279         struct mlxsw_sp_fib *fib;
2280         struct mlxsw_sp_vr *vr;
2281         int err;
2282
2283         vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
2284         if (IS_ERR(vr))
2285                 return ERR_CAST(vr);
2286         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
2287
2288         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
2289                                             sizeof(fen_info->dst),
2290                                             fen_info->dst_len);
2291         if (fib_node)
2292                 return fib_node;
2293
2294         fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
2295                                             sizeof(fen_info->dst),
2296                                             fen_info->dst_len);
2297         if (!fib_node) {
2298                 err = -ENOMEM;
2299                 goto err_fib_node_create;
2300         }
2301
2302         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
2303         if (err)
2304                 goto err_fib_node_init;
2305
2306         return fib_node;
2307
2308 err_fib_node_init:
2309         mlxsw_sp_fib_node_destroy(fib_node);
2310 err_fib_node_create:
2311         mlxsw_sp_vr_put(vr);
2312         return ERR_PTR(err);
2313 }
2314
2315 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2316                                    struct mlxsw_sp_fib_node *fib_node)
2317 {
2318         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
2319
2320         if (!list_empty(&fib_node->entry_list))
2321                 return;
2322         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
2323         mlxsw_sp_fib_node_destroy(fib_node);
2324         mlxsw_sp_vr_put(vr);
2325 }
2326
2327 static struct mlxsw_sp_fib_entry *
2328 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2329                               const struct mlxsw_sp_fib_entry_params *params)
2330 {
2331         struct mlxsw_sp_fib_entry *fib_entry;
2332
2333         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2334                 if (fib_entry->params.tb_id > params->tb_id)
2335                         continue;
2336                 if (fib_entry->params.tb_id != params->tb_id)
2337                         break;
2338                 if (fib_entry->params.tos > params->tos)
2339                         continue;
2340                 if (fib_entry->params.prio >= params->prio ||
2341                     fib_entry->params.tos < params->tos)
2342                         return fib_entry;
2343         }
2344
2345         return NULL;
2346 }
2347
2348 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2349                                           struct mlxsw_sp_fib_entry *new_entry)
2350 {
2351         struct mlxsw_sp_fib_node *fib_node;
2352
2353         if (WARN_ON(!fib_entry))
2354                 return -EINVAL;
2355
2356         fib_node = fib_entry->fib_node;
2357         list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2358                 if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2359                     fib_entry->params.tos != new_entry->params.tos ||
2360                     fib_entry->params.prio != new_entry->params.prio)
2361                         break;
2362         }
2363
2364         list_add_tail(&new_entry->list, &fib_entry->list);
2365         return 0;
2366 }
2367
2368 static int
2369 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2370                                struct mlxsw_sp_fib_entry *new_entry,
2371                                bool replace, bool append)
2372 {
2373         struct mlxsw_sp_fib_entry *fib_entry;
2374
2375         fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2376
2377         if (append)
2378                 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2379         if (replace && WARN_ON(!fib_entry))
2380                 return -EINVAL;
2381
2382         /* Insert new entry before replaced one, so that we can later
2383          * remove the second.
2384          */
2385         if (fib_entry) {
2386                 list_add_tail(&new_entry->list, &fib_entry->list);
2387         } else {
2388                 struct mlxsw_sp_fib_entry *last;
2389
2390                 list_for_each_entry(last, &fib_node->entry_list, list) {
2391                         if (new_entry->params.tb_id > last->params.tb_id)
2392                                 break;
2393                         fib_entry = last;
2394                 }
2395
2396                 if (fib_entry)
2397                         list_add(&new_entry->list, &fib_entry->list);
2398                 else
2399                         list_add(&new_entry->list, &fib_node->entry_list);
2400         }
2401
2402         return 0;
2403 }
2404
2405 static void
2406 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2407 {
2408         list_del(&fib_entry->list);
2409 }
2410
2411 static int
2412 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2413                              const struct mlxsw_sp_fib_node *fib_node,
2414                              struct mlxsw_sp_fib_entry *fib_entry)
2415 {
2416         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2417                 return 0;
2418
2419         /* To prevent packet loss, overwrite the previously offloaded
2420          * entry.
2421          */
2422         if (!list_is_singular(&fib_node->entry_list)) {
2423                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2424                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2425
2426                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2427         }
2428
2429         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2430 }
2431
2432 static void
2433 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2434                              const struct mlxsw_sp_fib_node *fib_node,
2435                              struct mlxsw_sp_fib_entry *fib_entry)
2436 {
2437         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2438                 return;
2439
2440         /* Promote the next entry by overwriting the deleted entry */
2441         if (!list_is_singular(&fib_node->entry_list)) {
2442                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2443                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2444
2445                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2446                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2447                 return;
2448         }
2449
2450         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2451 }
2452
2453 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2454                                          struct mlxsw_sp_fib_entry *fib_entry,
2455                                          bool replace, bool append)
2456 {
2457         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2458         int err;
2459
2460         err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2461                                              append);
2462         if (err)
2463                 return err;
2464
2465         err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2466         if (err)
2467                 goto err_fib4_node_entry_add;
2468
2469         return 0;
2470
2471 err_fib4_node_entry_add:
2472         mlxsw_sp_fib4_node_list_remove(fib_entry);
2473         return err;
2474 }
2475
2476 static void
2477 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2478                                 struct mlxsw_sp_fib_entry *fib_entry)
2479 {
2480         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2481
2482         mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2483         mlxsw_sp_fib4_node_list_remove(fib_entry);
2484 }
2485
2486 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2487                                         struct mlxsw_sp_fib_entry *fib_entry,
2488                                         bool replace)
2489 {
2490         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2491         struct mlxsw_sp_fib_entry *replaced;
2492
2493         if (!replace)
2494                 return;
2495
2496         /* We inserted the new entry before replaced one */
2497         replaced = list_next_entry(fib_entry, list);
2498
2499         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2500         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2501         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2502 }
2503
2504 static int
2505 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2506                          const struct fib_entry_notifier_info *fen_info,
2507                          bool replace, bool append)
2508 {
2509         struct mlxsw_sp_fib_entry *fib_entry;
2510         struct mlxsw_sp_fib_node *fib_node;
2511         int err;
2512
2513         if (mlxsw_sp->router.aborted)
2514                 return 0;
2515
2516         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2517         if (IS_ERR(fib_node)) {
2518                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2519                 return PTR_ERR(fib_node);
2520         }
2521
2522         fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2523         if (IS_ERR(fib_entry)) {
2524                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2525                 err = PTR_ERR(fib_entry);
2526                 goto err_fib4_entry_create;
2527         }
2528
2529         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2530                                             append);
2531         if (err) {
2532                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2533                 goto err_fib4_node_entry_link;
2534         }
2535
2536         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2537
2538         return 0;
2539
2540 err_fib4_node_entry_link:
2541         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2542 err_fib4_entry_create:
2543         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2544         return err;
2545 }
2546
2547 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2548                                      struct fib_entry_notifier_info *fen_info)
2549 {
2550         struct mlxsw_sp_fib_entry *fib_entry;
2551         struct mlxsw_sp_fib_node *fib_node;
2552
2553         if (mlxsw_sp->router.aborted)
2554                 return;
2555
2556         fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2557         if (WARN_ON(!fib_entry))
2558                 return;
2559         fib_node = fib_entry->fib_node;
2560
2561         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2562         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2563         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2564 }
2565
2566 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2567 {
2568         char ralta_pl[MLXSW_REG_RALTA_LEN];
2569         char ralst_pl[MLXSW_REG_RALST_LEN];
2570         int i, err;
2571
2572         mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2573                              MLXSW_SP_LPM_TREE_MIN);
2574         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2575         if (err)
2576                 return err;
2577
2578         mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2579         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2580         if (err)
2581                 return err;
2582
2583         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2584                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2585                 char raltb_pl[MLXSW_REG_RALTB_LEN];
2586                 char ralue_pl[MLXSW_REG_RALUE_LEN];
2587
2588                 if (!mlxsw_sp_vr_is_used(vr))
2589                         continue;
2590
2591                 mlxsw_reg_raltb_pack(raltb_pl, vr->id,
2592                                      MLXSW_REG_RALXX_PROTOCOL_IPV4,
2593                                      MLXSW_SP_LPM_TREE_MIN);
2594                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
2595                                       raltb_pl);
2596                 if (err)
2597                         return err;
2598
2599                 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2600                                       MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
2601                                       0);
2602                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2603                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
2604                                       ralue_pl);
2605                 if (err)
2606                         return err;
2607         }
2608
2609         return 0;
2610 }
2611
2612 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2613                                      struct mlxsw_sp_fib_node *fib_node)
2614 {
2615         struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2616
2617         list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2618                 bool do_break = &tmp->list == &fib_node->entry_list;
2619
2620                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2621                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2622                 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2623                 /* Break when entry list is empty and node was freed.
2624                  * Otherwise, we'll access freed memory in the next
2625                  * iteration.
2626                  */
2627                 if (do_break)
2628                         break;
2629         }
2630 }
2631
2632 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2633                                     struct mlxsw_sp_fib_node *fib_node)
2634 {
2635         switch (fib_node->fib->proto) {
2636         case MLXSW_SP_L3_PROTO_IPV4:
2637                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2638                 break;
2639         case MLXSW_SP_L3_PROTO_IPV6:
2640                 WARN_ON_ONCE(1);
2641                 break;
2642         }
2643 }
2644
2645 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
2646                                   struct mlxsw_sp_vr *vr,
2647                                   enum mlxsw_sp_l3proto proto)
2648 {
2649         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
2650         struct mlxsw_sp_fib_node *fib_node, *tmp;
2651
2652         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
2653                 bool do_break = &tmp->list == &fib->node_list;
2654
2655                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2656                 if (do_break)
2657                         break;
2658         }
2659 }
2660
2661 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2662 {
2663         int i;
2664
2665         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2666                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2667
2668                 if (!mlxsw_sp_vr_is_used(vr))
2669                         continue;
2670                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
2671         }
2672 }
2673
2674 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2675 {
2676         int err;
2677
2678         if (mlxsw_sp->router.aborted)
2679                 return;
2680         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2681         mlxsw_sp_router_fib_flush(mlxsw_sp);
2682         mlxsw_sp->router.aborted = true;
2683         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2684         if (err)
2685                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2686 }
2687
2688 struct mlxsw_sp_fib_event_work {
2689         struct work_struct work;
2690         union {
2691                 struct fib_entry_notifier_info fen_info;
2692                 struct fib_rule_notifier_info fr_info;
2693                 struct fib_nh_notifier_info fnh_info;
2694         };
2695         struct mlxsw_sp *mlxsw_sp;
2696         unsigned long event;
2697 };
2698
2699 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2700 {
2701         struct mlxsw_sp_fib_event_work *fib_work =
2702                 container_of(work, struct mlxsw_sp_fib_event_work, work);
2703         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2704         struct fib_rule *rule;
2705         bool replace, append;
2706         int err;
2707
2708         /* Protect internal structures from changes */
2709         rtnl_lock();
2710         switch (fib_work->event) {
2711         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2712         case FIB_EVENT_ENTRY_APPEND: /* fall through */
2713         case FIB_EVENT_ENTRY_ADD:
2714                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2715                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2716                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2717                                                replace, append);
2718                 if (err)
2719                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2720                 fib_info_put(fib_work->fen_info.fi);
2721                 break;
2722         case FIB_EVENT_ENTRY_DEL:
2723                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2724                 fib_info_put(fib_work->fen_info.fi);
2725                 break;
2726         case FIB_EVENT_RULE_ADD: /* fall through */
2727         case FIB_EVENT_RULE_DEL:
2728                 rule = fib_work->fr_info.rule;
2729                 if (!fib4_rule_default(rule) && !rule->l3mdev)
2730                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2731                 fib_rule_put(rule);
2732                 break;
2733         case FIB_EVENT_NH_ADD: /* fall through */
2734         case FIB_EVENT_NH_DEL:
2735                 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2736                                        fib_work->fnh_info.fib_nh);
2737                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2738                 break;
2739         }
2740         rtnl_unlock();
2741         kfree(fib_work);
2742 }
2743
2744 /* Called with rcu_read_lock() */
2745 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2746                                      unsigned long event, void *ptr)
2747 {
2748         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2749         struct mlxsw_sp_fib_event_work *fib_work;
2750         struct fib_notifier_info *info = ptr;
2751
2752         if (!net_eq(info->net, &init_net))
2753                 return NOTIFY_DONE;
2754
2755         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2756         if (WARN_ON(!fib_work))
2757                 return NOTIFY_BAD;
2758
2759         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2760         fib_work->mlxsw_sp = mlxsw_sp;
2761         fib_work->event = event;
2762
2763         switch (event) {
2764         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2765         case FIB_EVENT_ENTRY_APPEND: /* fall through */
2766         case FIB_EVENT_ENTRY_ADD: /* fall through */
2767         case FIB_EVENT_ENTRY_DEL:
2768                 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2769                 /* Take referece on fib_info to prevent it from being
2770                  * freed while work is queued. Release it afterwards.
2771                  */
2772                 fib_info_hold(fib_work->fen_info.fi);
2773                 break;
2774         case FIB_EVENT_RULE_ADD: /* fall through */
2775         case FIB_EVENT_RULE_DEL:
2776                 memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
2777                 fib_rule_get(fib_work->fr_info.rule);
2778                 break;
2779         case FIB_EVENT_NH_ADD: /* fall through */
2780         case FIB_EVENT_NH_DEL:
2781                 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2782                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2783                 break;
2784         }
2785
2786         mlxsw_core_schedule_work(&fib_work->work);
2787
2788         return NOTIFY_DONE;
2789 }
2790
2791 static struct mlxsw_sp_rif *
2792 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
2793                          const struct net_device *dev)
2794 {
2795         int i;
2796
2797         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2798                 if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
2799                         return mlxsw_sp->rifs[i];
2800
2801         return NULL;
2802 }
2803
2804 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2805 {
2806         char ritr_pl[MLXSW_REG_RITR_LEN];
2807         int err;
2808
2809         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2810         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2811         if (WARN_ON_ONCE(err))
2812                 return err;
2813
2814         mlxsw_reg_ritr_enable_set(ritr_pl, false);
2815         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2816 }
2817
2818 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2819                                           struct mlxsw_sp_rif *rif)
2820 {
2821         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
2822         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
2823         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
2824 }
2825
2826 static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
2827                                        const struct in_device *in_dev,
2828                                        unsigned long event)
2829 {
2830         switch (event) {
2831         case NETDEV_UP:
2832                 if (!rif)
2833                         return true;
2834                 return false;
2835         case NETDEV_DOWN:
2836                 if (rif && !in_dev->ifa_list &&
2837                     !netif_is_l3_slave(rif->dev))
2838                         return true;
2839                 /* It is possible we already removed the RIF ourselves
2840                  * if it was assigned to a netdev that is now a bridge
2841                  * or LAG slave.
2842                  */
2843                 return false;
2844         }
2845
2846         return false;
2847 }
2848
2849 #define MLXSW_SP_INVALID_INDEX_RIF 0xffff
2850 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
2851 {
2852         int i;
2853
2854         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2855                 if (!mlxsw_sp->rifs[i])
2856                         return i;
2857
2858         return MLXSW_SP_INVALID_INDEX_RIF;
2859 }
2860
2861 static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
2862                                            bool *p_lagged, u16 *p_system_port)
2863 {
2864         u8 local_port = mlxsw_sp_vport->local_port;
2865
2866         *p_lagged = mlxsw_sp_vport->lagged;
2867         *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
2868 }
2869
2870 static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
2871                                     u16 vr_id, struct net_device *l3_dev,
2872                                     u16 rif_index, bool create)
2873 {
2874         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2875         bool lagged = mlxsw_sp_vport->lagged;
2876         char ritr_pl[MLXSW_REG_RITR_LEN];
2877         u16 system_port;
2878
2879         mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index,
2880                             vr_id, l3_dev->mtu, l3_dev->dev_addr);
2881
2882         mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
2883         mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
2884                                   mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
2885
2886         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2887 }
2888
2889 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
2890
2891 static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index)
2892 {
2893         return MLXSW_SP_RFID_BASE + rif_index;
2894 }
2895
2896 static struct mlxsw_sp_fid *
2897 mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
2898 {
2899         struct mlxsw_sp_fid *f;
2900
2901         f = kzalloc(sizeof(*f), GFP_KERNEL);
2902         if (!f)
2903                 return NULL;
2904
2905         f->leave = mlxsw_sp_vport_rif_sp_leave;
2906         f->ref_count = 0;
2907         f->dev = l3_dev;
2908         f->fid = fid;
2909
2910         return f;
2911 }
2912
2913 static struct mlxsw_sp_rif *
2914 mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev,
2915                    struct mlxsw_sp_fid *f)
2916 {
2917         struct mlxsw_sp_rif *rif;
2918
2919         rif = kzalloc(sizeof(*rif), GFP_KERNEL);
2920         if (!rif)
2921                 return NULL;
2922
2923         INIT_LIST_HEAD(&rif->nexthop_list);
2924         INIT_LIST_HEAD(&rif->neigh_list);
2925         ether_addr_copy(rif->addr, l3_dev->dev_addr);
2926         rif->mtu = l3_dev->mtu;
2927         rif->vr_id = vr_id;
2928         rif->dev = l3_dev;
2929         rif->rif_index = rif_index;
2930         rif->f = f;
2931
2932         return rif;
2933 }
2934
2935 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
2936 {
2937         return rif->rif_index;
2938 }
2939
2940 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
2941 {
2942         return rif->dev->ifindex;
2943 }
2944
2945 static struct mlxsw_sp_rif *
2946 mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
2947                              struct net_device *l3_dev)
2948 {
2949         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2950         u32 tb_id = l3mdev_fib_table(l3_dev);
2951         struct mlxsw_sp_vr *vr;
2952         struct mlxsw_sp_fid *f;
2953         struct mlxsw_sp_rif *rif;
2954         u16 fid, rif_index;
2955         int err;
2956
2957         rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
2958         if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
2959                 return ERR_PTR(-ERANGE);
2960
2961         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
2962         if (IS_ERR(vr))
2963                 return ERR_CAST(vr);
2964
2965         err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev,
2966                                        rif_index, true);
2967         if (err)
2968                 goto err_vport_rif_sp_op;
2969
2970         fid = mlxsw_sp_rif_sp_to_fid(rif_index);
2971         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
2972         if (err)
2973                 goto err_rif_fdb_op;
2974
2975         f = mlxsw_sp_rfid_alloc(fid, l3_dev);
2976         if (!f) {
2977                 err = -ENOMEM;
2978                 goto err_rfid_alloc;
2979         }
2980
2981         rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
2982         if (!rif) {
2983                 err = -ENOMEM;
2984                 goto err_rif_alloc;
2985         }
2986
2987         if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core),
2988                                                 MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) {
2989                 err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
2990                                                  MLXSW_SP_RIF_COUNTER_EGRESS);
2991                 if (err)
2992                         netdev_dbg(mlxsw_sp_vport->dev,
2993                                    "Counter alloc Failed err=%d\n", err);
2994         }
2995
2996         f->rif = rif;
2997         mlxsw_sp->rifs[rif_index] = rif;
2998         vr->rif_count++;
2999
3000         return rif;
3001
3002 err_rif_alloc:
3003         kfree(f);
3004 err_rfid_alloc:
3005         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3006 err_rif_fdb_op:
3007         mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3008                                  false);
3009 err_vport_rif_sp_op:
3010         mlxsw_sp_vr_put(vr);
3011         return ERR_PTR(err);
3012 }
3013
3014 static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
3015                                           struct mlxsw_sp_rif *rif)
3016 {
3017         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3018         struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3019         struct net_device *l3_dev = rif->dev;
3020         struct mlxsw_sp_fid *f = rif->f;
3021         u16 rif_index = rif->rif_index;
3022         u16 fid = f->fid;
3023
3024         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3025
3026         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
3027         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS);
3028
3029         vr->rif_count--;
3030         mlxsw_sp->rifs[rif_index] = NULL;
3031         f->rif = NULL;
3032
3033         kfree(rif);
3034
3035         kfree(f);
3036
3037         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3038
3039         mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3040                                  false);
3041         mlxsw_sp_vr_put(vr);
3042 }
3043
3044 static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
3045                                       struct net_device *l3_dev)
3046 {
3047         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3048         struct mlxsw_sp_rif *rif;
3049
3050         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3051         if (!rif) {
3052                 rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
3053                 if (IS_ERR(rif))
3054                         return PTR_ERR(rif);
3055         }
3056
3057         mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f);
3058         rif->f->ref_count++;
3059
3060         netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid);
3061
3062         return 0;
3063 }
3064
3065 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
3066 {
3067         struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
3068
3069         netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
3070
3071         mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
3072         if (--f->ref_count == 0)
3073                 mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif);
3074 }
3075
3076 static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
3077                                          struct net_device *port_dev,
3078                                          unsigned long event, u16 vid)
3079 {
3080         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
3081         struct mlxsw_sp_port *mlxsw_sp_vport;
3082
3083         mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
3084         if (WARN_ON(!mlxsw_sp_vport))
3085                 return -EINVAL;
3086
3087         switch (event) {
3088         case NETDEV_UP:
3089                 return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
3090         case NETDEV_DOWN:
3091                 mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
3092                 break;
3093         }
3094
3095         return 0;
3096 }
3097
3098 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
3099                                         unsigned long event)
3100 {
3101         if (netif_is_bridge_port(port_dev) ||
3102             netif_is_lag_port(port_dev) ||
3103             netif_is_ovs_port(port_dev))
3104                 return 0;
3105
3106         return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
3107 }
3108
3109 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
3110                                          struct net_device *lag_dev,
3111                                          unsigned long event, u16 vid)
3112 {
3113         struct net_device *port_dev;
3114         struct list_head *iter;
3115         int err;
3116
3117         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
3118                 if (mlxsw_sp_port_dev_check(port_dev)) {
3119                         err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
3120                                                             event, vid);
3121                         if (err)
3122                                 return err;
3123                 }
3124         }
3125
3126         return 0;
3127 }
3128
3129 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
3130                                        unsigned long event)
3131 {
3132         if (netif_is_bridge_port(lag_dev))
3133                 return 0;
3134
3135         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
3136 }
3137
3138 static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
3139                                                     struct net_device *l3_dev)
3140 {
3141         u16 fid;
3142
3143         if (is_vlan_dev(l3_dev))
3144                 fid = vlan_dev_vlan_id(l3_dev);
3145         else if (mlxsw_sp->master_bridge.dev == l3_dev)
3146                 fid = 1;
3147         else
3148                 return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
3149
3150         return mlxsw_sp_fid_find(mlxsw_sp, fid);
3151 }
3152
3153 static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
3154 {
3155         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
3156 }
3157
3158 static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
3159 {
3160         return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
3161                MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
3162 }
3163
3164 static u16 mlxsw_sp_flood_table_index_get(u16 fid)
3165 {
3166         return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
3167 }
3168
3169 static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
3170                                           bool set)
3171 {
3172         u8 router_port = mlxsw_sp_router_port(mlxsw_sp);
3173         enum mlxsw_flood_table_type table_type;
3174         char *sftr_pl;
3175         u16 index;
3176         int err;
3177
3178         sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
3179         if (!sftr_pl)
3180                 return -ENOMEM;
3181
3182         table_type = mlxsw_sp_flood_table_type_get(fid);
3183         index = mlxsw_sp_flood_table_index_get(fid);
3184         mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
3185                             1, router_port, set);
3186         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
3187
3188         kfree(sftr_pl);
3189         return err;
3190 }
3191
3192 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
3193 {
3194         if (mlxsw_sp_fid_is_vfid(fid))
3195                 return MLXSW_REG_RITR_FID_IF;
3196         else
3197                 return MLXSW_REG_RITR_VLAN_IF;
3198 }
3199
3200 static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
3201                                   struct net_device *l3_dev,
3202                                   u16 fid, u16 rif,
3203                                   bool create)
3204 {
3205         enum mlxsw_reg_ritr_if_type rif_type;
3206         char ritr_pl[MLXSW_REG_RITR_LEN];
3207
3208         rif_type = mlxsw_sp_rif_type_get(fid);
3209         mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu,
3210                             l3_dev->dev_addr);
3211         mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
3212
3213         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3214 }
3215
3216 static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
3217                                       struct net_device *l3_dev,
3218                                       struct mlxsw_sp_fid *f)
3219 {
3220         u32 tb_id = l3mdev_fib_table(l3_dev);
3221         struct mlxsw_sp_rif *rif;
3222         struct mlxsw_sp_vr *vr;
3223         u16 rif_index;
3224         int err;
3225
3226         rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
3227         if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
3228                 return -ERANGE;
3229
3230         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
3231         if (IS_ERR(vr))
3232                 return PTR_ERR(vr);
3233
3234         err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
3235         if (err)
3236                 goto err_port_flood_set;
3237
3238         err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid,
3239                                      rif_index, true);
3240         if (err)
3241                 goto err_rif_bridge_op;
3242
3243         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
3244         if (err)
3245                 goto err_rif_fdb_op;
3246
3247         rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
3248         if (!rif) {
3249                 err = -ENOMEM;
3250                 goto err_rif_alloc;
3251         }
3252
3253         f->rif = rif;
3254         mlxsw_sp->rifs[rif_index] = rif;
3255         vr->rif_count++;
3256
3257         netdev_dbg(l3_dev, "RIF=%d created\n", rif_index);
3258
3259         return 0;
3260
3261 err_rif_alloc:
3262         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3263 err_rif_fdb_op:
3264         mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3265                                false);
3266 err_rif_bridge_op:
3267         mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3268 err_port_flood_set:
3269         mlxsw_sp_vr_put(vr);
3270         return err;
3271 }
3272
3273 void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
3274                                  struct mlxsw_sp_rif *rif)
3275 {
3276         struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3277         struct net_device *l3_dev = rif->dev;
3278         struct mlxsw_sp_fid *f = rif->f;
3279         u16 rif_index = rif->rif_index;
3280
3281         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3282
3283         vr->rif_count--;
3284         mlxsw_sp->rifs[rif_index] = NULL;
3285         f->rif = NULL;
3286
3287         kfree(rif);
3288
3289         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3290
3291         mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3292                                false);
3293
3294         mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3295
3296         mlxsw_sp_vr_put(vr);
3297
3298         netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index);
3299 }
3300
3301 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
3302                                           struct net_device *br_dev,
3303                                           unsigned long event)
3304 {
3305         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3306         struct mlxsw_sp_fid *f;
3307
3308         /* FID can either be an actual FID if the L3 device is the
3309          * VLAN-aware bridge or a VLAN device on top. Otherwise, the
3310          * L3 device is a VLAN-unaware bridge and we get a vFID.
3311          */
3312         f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
3313         if (WARN_ON(!f))
3314                 return -EINVAL;
3315
3316         switch (event) {
3317         case NETDEV_UP:
3318                 return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
3319         case NETDEV_DOWN:
3320                 mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
3321                 break;
3322         }
3323
3324         return 0;
3325 }
3326
3327 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
3328                                         unsigned long event)
3329 {
3330         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
3331         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
3332         u16 vid = vlan_dev_vlan_id(vlan_dev);
3333
3334         if (mlxsw_sp_port_dev_check(real_dev))
3335                 return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
3336                                                      vid);
3337         else if (netif_is_lag_master(real_dev))
3338                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
3339                                                      vid);
3340         else if (netif_is_bridge_master(real_dev) &&
3341                  mlxsw_sp->master_bridge.dev == real_dev)
3342                 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
3343                                                       event);
3344
3345         return 0;
3346 }
3347
3348 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
3349                                      unsigned long event)
3350 {
3351         if (mlxsw_sp_port_dev_check(dev))
3352                 return mlxsw_sp_inetaddr_port_event(dev, event);
3353         else if (netif_is_lag_master(dev))
3354                 return mlxsw_sp_inetaddr_lag_event(dev, event);
3355         else if (netif_is_bridge_master(dev))
3356                 return mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
3357         else if (is_vlan_dev(dev))
3358                 return mlxsw_sp_inetaddr_vlan_event(dev, event);
3359         else
3360                 return 0;
3361 }
3362
3363 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
3364                             unsigned long event, void *ptr)
3365 {
3366         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
3367         struct net_device *dev = ifa->ifa_dev->dev;
3368         struct mlxsw_sp *mlxsw_sp;
3369         struct mlxsw_sp_rif *rif;
3370         int err = 0;
3371
3372         mlxsw_sp = mlxsw_sp_lower_get(dev);
3373         if (!mlxsw_sp)
3374                 goto out;
3375
3376         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3377         if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
3378                 goto out;
3379
3380         err = __mlxsw_sp_inetaddr_event(dev, event);
3381 out:
3382         return notifier_from_errno(err);
3383 }
3384
3385 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
3386                              const char *mac, int mtu)
3387 {
3388         char ritr_pl[MLXSW_REG_RITR_LEN];
3389         int err;
3390
3391         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
3392         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3393         if (err)
3394                 return err;
3395
3396         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
3397         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
3398         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
3399         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3400 }
3401
3402 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
3403 {
3404         struct mlxsw_sp *mlxsw_sp;
3405         struct mlxsw_sp_rif *rif;
3406         int err;
3407
3408         mlxsw_sp = mlxsw_sp_lower_get(dev);
3409         if (!mlxsw_sp)
3410                 return 0;
3411
3412         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3413         if (!rif)
3414                 return 0;
3415
3416         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false);
3417         if (err)
3418                 return err;
3419
3420         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
3421                                 dev->mtu);
3422         if (err)
3423                 goto err_rif_edit;
3424
3425         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true);
3426         if (err)
3427                 goto err_rif_fdb_op;
3428
3429         ether_addr_copy(rif->addr, dev->dev_addr);
3430         rif->mtu = dev->mtu;
3431
3432         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
3433
3434         return 0;
3435
3436 err_rif_fdb_op:
3437         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
3438 err_rif_edit:
3439         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true);
3440         return err;
3441 }
3442
3443 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
3444                                   struct net_device *l3_dev)
3445 {
3446         struct mlxsw_sp_rif *rif;
3447
3448         /* If netdev is already associated with a RIF, then we need to
3449          * destroy it and create a new one with the new virtual router ID.
3450          */
3451         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3452         if (rif)
3453                 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3454
3455         return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
3456 }
3457
3458 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
3459                                     struct net_device *l3_dev)
3460 {
3461         struct mlxsw_sp_rif *rif;
3462
3463         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3464         if (!rif)
3465                 return;
3466         __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3467 }
3468
3469 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
3470                                  struct netdev_notifier_changeupper_info *info)
3471 {
3472         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3473         int err = 0;
3474
3475         if (!mlxsw_sp)
3476                 return 0;
3477
3478         switch (event) {
3479         case NETDEV_PRECHANGEUPPER:
3480                 return 0;
3481         case NETDEV_CHANGEUPPER:
3482                 if (info->linking)
3483                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
3484                 else
3485                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
3486                 break;
3487         }
3488
3489         return err;
3490 }
3491
3492 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
3493 {
3494         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
3495
3496         /* Flush pending FIB notifications and then flush the device's
3497          * table before requesting another dump. The FIB notification
3498          * block is unregistered, so no need to take RTNL.
3499          */
3500         mlxsw_core_flush_owq();
3501         mlxsw_sp_router_fib_flush(mlxsw_sp);
3502 }
3503
3504 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3505 {
3506         char rgcr_pl[MLXSW_REG_RGCR_LEN];
3507         u64 max_rifs;
3508         int err;
3509
3510         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
3511                 return -EIO;
3512
3513         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
3514         mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
3515                                  GFP_KERNEL);
3516         if (!mlxsw_sp->rifs)
3517                 return -ENOMEM;
3518
3519         mlxsw_reg_rgcr_pack(rgcr_pl, true);
3520         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
3521         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3522         if (err)
3523                 goto err_rgcr_fail;
3524
3525         return 0;
3526
3527 err_rgcr_fail:
3528         kfree(mlxsw_sp->rifs);
3529         return err;
3530 }
3531
3532 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3533 {
3534         char rgcr_pl[MLXSW_REG_RGCR_LEN];
3535         int i;
3536
3537         mlxsw_reg_rgcr_pack(rgcr_pl, false);
3538         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3539
3540         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
3541                 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
3542
3543         kfree(mlxsw_sp->rifs);
3544 }
3545
3546 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3547 {
3548         int err;
3549
3550         INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
3551         err = __mlxsw_sp_router_init(mlxsw_sp);
3552         if (err)
3553                 return err;
3554
3555         err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
3556                               &mlxsw_sp_nexthop_ht_params);
3557         if (err)
3558                 goto err_nexthop_ht_init;
3559
3560         err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
3561                               &mlxsw_sp_nexthop_group_ht_params);
3562         if (err)
3563                 goto err_nexthop_group_ht_init;
3564
3565         err = mlxsw_sp_lpm_init(mlxsw_sp);
3566         if (err)
3567                 goto err_lpm_init;
3568
3569         err = mlxsw_sp_vrs_init(mlxsw_sp);
3570         if (err)
3571                 goto err_vrs_init;
3572
3573         err = mlxsw_sp_neigh_init(mlxsw_sp);
3574         if (err)
3575                 goto err_neigh_init;
3576
3577         mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
3578         err = register_fib_notifier(&mlxsw_sp->fib_nb,
3579                                     mlxsw_sp_router_fib_dump_flush);
3580         if (err)
3581                 goto err_register_fib_notifier;
3582
3583         return 0;
3584
3585 err_register_fib_notifier:
3586         mlxsw_sp_neigh_fini(mlxsw_sp);
3587 err_neigh_init:
3588         mlxsw_sp_vrs_fini(mlxsw_sp);
3589 err_vrs_init:
3590         mlxsw_sp_lpm_fini(mlxsw_sp);
3591 err_lpm_init:
3592         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3593 err_nexthop_group_ht_init:
3594         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3595 err_nexthop_ht_init:
3596         __mlxsw_sp_router_fini(mlxsw_sp);
3597         return err;
3598 }
3599
3600 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3601 {
3602         unregister_fib_notifier(&mlxsw_sp->fib_nb);
3603         mlxsw_sp_neigh_fini(mlxsw_sp);
3604         mlxsw_sp_vrs_fini(mlxsw_sp);
3605         mlxsw_sp_lpm_fini(mlxsw_sp);
3606         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3607         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3608         __mlxsw_sp_router_fini(mlxsw_sp);
3609 }