]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
Merge branch 'work.splice' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[karo-tx-linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <net/netevent.h>
45 #include <net/neighbour.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48
49 #include "spectrum.h"
50 #include "core.h"
51 #include "reg.h"
52
53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
54         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
55
56 static bool
57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
58                              struct mlxsw_sp_prefix_usage *prefix_usage2)
59 {
60         unsigned char prefix;
61
62         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
63                 if (!test_bit(prefix, prefix_usage2->b))
64                         return false;
65         }
66         return true;
67 }
68
69 static bool
70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
71                          struct mlxsw_sp_prefix_usage *prefix_usage2)
72 {
73         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
74 }
75
76 static bool
77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
78 {
79         struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
80
81         return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
82 }
83
84 static void
85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
86                           struct mlxsw_sp_prefix_usage *prefix_usage2)
87 {
88         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
89 }
90
91 static void
92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
93 {
94         memset(prefix_usage, 0, sizeof(*prefix_usage));
95 }
96
97 static void
98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
99                           unsigned char prefix_len)
100 {
101         set_bit(prefix_len, prefix_usage->b);
102 }
103
104 static void
105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
106                             unsigned char prefix_len)
107 {
108         clear_bit(prefix_len, prefix_usage->b);
109 }
110
111 struct mlxsw_sp_fib_key {
112         unsigned char addr[sizeof(struct in6_addr)];
113         unsigned char prefix_len;
114 };
115
116 enum mlxsw_sp_fib_entry_type {
117         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121
122 struct mlxsw_sp_nexthop_group;
123
124 struct mlxsw_sp_fib_node {
125         struct list_head entry_list;
126         struct list_head list;
127         struct rhash_head ht_node;
128         struct mlxsw_sp_vr *vr;
129         struct mlxsw_sp_fib_key key;
130 };
131
132 struct mlxsw_sp_fib_entry_params {
133         u32 tb_id;
134         u32 prio;
135         u8 tos;
136         u8 type;
137 };
138
139 struct mlxsw_sp_fib_entry {
140         struct list_head list;
141         struct mlxsw_sp_fib_node *fib_node;
142         enum mlxsw_sp_fib_entry_type type;
143         struct list_head nexthop_group_node;
144         struct mlxsw_sp_nexthop_group *nh_group;
145         struct mlxsw_sp_fib_entry_params params;
146         bool offloaded;
147 };
148
149 struct mlxsw_sp_fib {
150         struct rhashtable ht;
151         struct list_head node_list;
152         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
153         struct mlxsw_sp_prefix_usage prefix_usage;
154 };
155
156 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
157
158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
159 {
160         struct mlxsw_sp_fib *fib;
161         int err;
162
163         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
164         if (!fib)
165                 return ERR_PTR(-ENOMEM);
166         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
167         if (err)
168                 goto err_rhashtable_init;
169         INIT_LIST_HEAD(&fib->node_list);
170         return fib;
171
172 err_rhashtable_init:
173         kfree(fib);
174         return ERR_PTR(err);
175 }
176
177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
178 {
179         WARN_ON(!list_empty(&fib->node_list));
180         rhashtable_destroy(&fib->ht);
181         kfree(fib);
182 }
183
184 static struct mlxsw_sp_lpm_tree *
185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
186 {
187         static struct mlxsw_sp_lpm_tree *lpm_tree;
188         int i;
189
190         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
191                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
192                 if (lpm_tree->ref_count == 0) {
193                         if (one_reserved)
194                                 one_reserved = false;
195                         else
196                                 return lpm_tree;
197                 }
198         }
199         return NULL;
200 }
201
202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
203                                    struct mlxsw_sp_lpm_tree *lpm_tree)
204 {
205         char ralta_pl[MLXSW_REG_RALTA_LEN];
206
207         mlxsw_reg_ralta_pack(ralta_pl, true,
208                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
209                              lpm_tree->id);
210         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
211 }
212
213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
214                                   struct mlxsw_sp_lpm_tree *lpm_tree)
215 {
216         char ralta_pl[MLXSW_REG_RALTA_LEN];
217
218         mlxsw_reg_ralta_pack(ralta_pl, false,
219                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
220                              lpm_tree->id);
221         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
222 }
223
224 static int
225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
226                                   struct mlxsw_sp_prefix_usage *prefix_usage,
227                                   struct mlxsw_sp_lpm_tree *lpm_tree)
228 {
229         char ralst_pl[MLXSW_REG_RALST_LEN];
230         u8 root_bin = 0;
231         u8 prefix;
232         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
233
234         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
235                 root_bin = prefix;
236
237         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
238         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
239                 if (prefix == 0)
240                         continue;
241                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
242                                          MLXSW_REG_RALST_BIN_NO_CHILD);
243                 last_prefix = prefix;
244         }
245         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
246 }
247
248 static struct mlxsw_sp_lpm_tree *
249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
250                          struct mlxsw_sp_prefix_usage *prefix_usage,
251                          enum mlxsw_sp_l3proto proto, bool one_reserved)
252 {
253         struct mlxsw_sp_lpm_tree *lpm_tree;
254         int err;
255
256         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
257         if (!lpm_tree)
258                 return ERR_PTR(-EBUSY);
259         lpm_tree->proto = proto;
260         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
261         if (err)
262                 return ERR_PTR(err);
263
264         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
265                                                 lpm_tree);
266         if (err)
267                 goto err_left_struct_set;
268         memcpy(&lpm_tree->prefix_usage, prefix_usage,
269                sizeof(lpm_tree->prefix_usage));
270         return lpm_tree;
271
272 err_left_struct_set:
273         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
274         return ERR_PTR(err);
275 }
276
277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
278                                      struct mlxsw_sp_lpm_tree *lpm_tree)
279 {
280         return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
281 }
282
283 static struct mlxsw_sp_lpm_tree *
284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
285                       struct mlxsw_sp_prefix_usage *prefix_usage,
286                       enum mlxsw_sp_l3proto proto, bool one_reserved)
287 {
288         struct mlxsw_sp_lpm_tree *lpm_tree;
289         int i;
290
291         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
292                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
293                 if (lpm_tree->ref_count != 0 &&
294                     lpm_tree->proto == proto &&
295                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
296                                              prefix_usage))
297                         goto inc_ref_count;
298         }
299         lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
300                                             proto, one_reserved);
301         if (IS_ERR(lpm_tree))
302                 return lpm_tree;
303
304 inc_ref_count:
305         lpm_tree->ref_count++;
306         return lpm_tree;
307 }
308
309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
310                                  struct mlxsw_sp_lpm_tree *lpm_tree)
311 {
312         if (--lpm_tree->ref_count == 0)
313                 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
314         return 0;
315 }
316
317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
318 {
319         struct mlxsw_sp_lpm_tree *lpm_tree;
320         int i;
321
322         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
323                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
324                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
325         }
326 }
327
328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
329 {
330         struct mlxsw_sp_vr *vr;
331         int i;
332
333         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
334                 vr = &mlxsw_sp->router.vrs[i];
335                 if (!vr->used)
336                         return vr;
337         }
338         return NULL;
339 }
340
341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
342                                      struct mlxsw_sp_vr *vr)
343 {
344         char raltb_pl[MLXSW_REG_RALTB_LEN];
345
346         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
347                              (enum mlxsw_reg_ralxx_protocol) vr->proto,
348                              vr->lpm_tree->id);
349         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
350 }
351
352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
353                                        struct mlxsw_sp_vr *vr)
354 {
355         char raltb_pl[MLXSW_REG_RALTB_LEN];
356
357         /* Bind to tree 0 which is default */
358         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
359                              (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
360         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
361 }
362
363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
364 {
365         /* For our purpose, squash main and local table into one */
366         if (tb_id == RT_TABLE_LOCAL)
367                 tb_id = RT_TABLE_MAIN;
368         return tb_id;
369 }
370
371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
372                                             u32 tb_id,
373                                             enum mlxsw_sp_l3proto proto)
374 {
375         struct mlxsw_sp_vr *vr;
376         int i;
377
378         tb_id = mlxsw_sp_fix_tb_id(tb_id);
379
380         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
381                 vr = &mlxsw_sp->router.vrs[i];
382                 if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
383                         return vr;
384         }
385         return NULL;
386 }
387
388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
389                                               unsigned char prefix_len,
390                                               u32 tb_id,
391                                               enum mlxsw_sp_l3proto proto)
392 {
393         struct mlxsw_sp_prefix_usage req_prefix_usage;
394         struct mlxsw_sp_lpm_tree *lpm_tree;
395         struct mlxsw_sp_vr *vr;
396         int err;
397
398         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
399         if (!vr)
400                 return ERR_PTR(-EBUSY);
401         vr->fib = mlxsw_sp_fib_create();
402         if (IS_ERR(vr->fib))
403                 return ERR_CAST(vr->fib);
404
405         vr->proto = proto;
406         vr->tb_id = tb_id;
407         mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
408         mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
409         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
410                                          proto, true);
411         if (IS_ERR(lpm_tree)) {
412                 err = PTR_ERR(lpm_tree);
413                 goto err_tree_get;
414         }
415         vr->lpm_tree = lpm_tree;
416         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
417         if (err)
418                 goto err_tree_bind;
419
420         vr->used = true;
421         return vr;
422
423 err_tree_bind:
424         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
425 err_tree_get:
426         mlxsw_sp_fib_destroy(vr->fib);
427
428         return ERR_PTR(err);
429 }
430
431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
432                                 struct mlxsw_sp_vr *vr)
433 {
434         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
435         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
436         mlxsw_sp_fib_destroy(vr->fib);
437         vr->used = false;
438 }
439
440 static int
441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
442                            struct mlxsw_sp_prefix_usage *req_prefix_usage)
443 {
444         struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
445         struct mlxsw_sp_lpm_tree *new_tree;
446         int err;
447
448         if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
449                 return 0;
450
451         new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
452                                          vr->proto, false);
453         if (IS_ERR(new_tree)) {
454                 /* We failed to get a tree according to the required
455                  * prefix usage. However, the current tree might be still good
456                  * for us if our requirement is subset of the prefixes used
457                  * in the tree.
458                  */
459                 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
460                                                  &lpm_tree->prefix_usage))
461                         return 0;
462                 return PTR_ERR(new_tree);
463         }
464
465         /* Prevent packet loss by overwriting existing binding */
466         vr->lpm_tree = new_tree;
467         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
468         if (err)
469                 goto err_tree_bind;
470         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
471
472         return 0;
473
474 err_tree_bind:
475         vr->lpm_tree = lpm_tree;
476         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
477         return err;
478 }
479
480 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
481                                            unsigned char prefix_len,
482                                            u32 tb_id,
483                                            enum mlxsw_sp_l3proto proto)
484 {
485         struct mlxsw_sp_vr *vr;
486         int err;
487
488         tb_id = mlxsw_sp_fix_tb_id(tb_id);
489         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
490         if (!vr) {
491                 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
492                 if (IS_ERR(vr))
493                         return vr;
494         } else {
495                 struct mlxsw_sp_prefix_usage req_prefix_usage;
496
497                 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
498                                           &vr->fib->prefix_usage);
499                 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
500                 /* Need to replace LPM tree in case new prefix is required. */
501                 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
502                                                  &req_prefix_usage);
503                 if (err)
504                         return ERR_PTR(err);
505         }
506         return vr;
507 }
508
509 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
510 {
511         /* Destroy virtual router entity in case the associated FIB is empty
512          * and allow it to be used for other tables in future. Otherwise,
513          * check if some prefix usage did not disappear and change tree if
514          * that is the case. Note that in case new, smaller tree cannot be
515          * allocated, the original one will be kept being used.
516          */
517         if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
518                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
519         else
520                 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
521                                            &vr->fib->prefix_usage);
522 }
523
524 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
525 {
526         struct mlxsw_sp_vr *vr;
527         u64 max_vrs;
528         int i;
529
530         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
531                 return -EIO;
532
533         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
534         mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
535                                        GFP_KERNEL);
536         if (!mlxsw_sp->router.vrs)
537                 return -ENOMEM;
538
539         for (i = 0; i < max_vrs; i++) {
540                 vr = &mlxsw_sp->router.vrs[i];
541                 vr->id = i;
542         }
543
544         return 0;
545 }
546
547 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
548
549 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
550 {
551         /* At this stage we're guaranteed not to have new incoming
552          * FIB notifications and the work queue is free from FIBs
553          * sitting on top of mlxsw netdevs. However, we can still
554          * have other FIBs queued. Flush the queue before flushing
555          * the device's tables. No need for locks, as we're the only
556          * writer.
557          */
558         mlxsw_core_flush_owq();
559         mlxsw_sp_router_fib_flush(mlxsw_sp);
560         kfree(mlxsw_sp->router.vrs);
561 }
562
563 struct mlxsw_sp_neigh_key {
564         struct neighbour *n;
565 };
566
567 struct mlxsw_sp_neigh_entry {
568         struct list_head rif_list_node;
569         struct rhash_head ht_node;
570         struct mlxsw_sp_neigh_key key;
571         u16 rif;
572         bool connected;
573         unsigned char ha[ETH_ALEN];
574         struct list_head nexthop_list; /* list of nexthops using
575                                         * this neigh entry
576                                         */
577         struct list_head nexthop_neighs_list_node;
578 };
579
580 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
581         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
582         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
583         .key_len = sizeof(struct mlxsw_sp_neigh_key),
584 };
585
586 static struct mlxsw_sp_neigh_entry *
587 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
588                            u16 rif)
589 {
590         struct mlxsw_sp_neigh_entry *neigh_entry;
591
592         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
593         if (!neigh_entry)
594                 return NULL;
595
596         neigh_entry->key.n = n;
597         neigh_entry->rif = rif;
598         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
599
600         return neigh_entry;
601 }
602
603 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
604 {
605         kfree(neigh_entry);
606 }
607
608 static int
609 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
610                             struct mlxsw_sp_neigh_entry *neigh_entry)
611 {
612         return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
613                                       &neigh_entry->ht_node,
614                                       mlxsw_sp_neigh_ht_params);
615 }
616
617 static void
618 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
619                             struct mlxsw_sp_neigh_entry *neigh_entry)
620 {
621         rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
622                                &neigh_entry->ht_node,
623                                mlxsw_sp_neigh_ht_params);
624 }
625
626 static struct mlxsw_sp_neigh_entry *
627 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
628 {
629         struct mlxsw_sp_neigh_entry *neigh_entry;
630         struct mlxsw_sp_rif *r;
631         int err;
632
633         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
634         if (!r)
635                 return ERR_PTR(-EINVAL);
636
637         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
638         if (!neigh_entry)
639                 return ERR_PTR(-ENOMEM);
640
641         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
642         if (err)
643                 goto err_neigh_entry_insert;
644
645         list_add(&neigh_entry->rif_list_node, &r->neigh_list);
646
647         return neigh_entry;
648
649 err_neigh_entry_insert:
650         mlxsw_sp_neigh_entry_free(neigh_entry);
651         return ERR_PTR(err);
652 }
653
654 static void
655 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
656                              struct mlxsw_sp_neigh_entry *neigh_entry)
657 {
658         list_del(&neigh_entry->rif_list_node);
659         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
660         mlxsw_sp_neigh_entry_free(neigh_entry);
661 }
662
663 static struct mlxsw_sp_neigh_entry *
664 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
665 {
666         struct mlxsw_sp_neigh_key key;
667
668         key.n = n;
669         return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
670                                       &key, mlxsw_sp_neigh_ht_params);
671 }
672
673 static void
674 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
675 {
676         unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
677
678         mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
679 }
680
681 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
682                                                    char *rauhtd_pl,
683                                                    int ent_index)
684 {
685         struct net_device *dev;
686         struct neighbour *n;
687         __be32 dipn;
688         u32 dip;
689         u16 rif;
690
691         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
692
693         if (!mlxsw_sp->rifs[rif]) {
694                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
695                 return;
696         }
697
698         dipn = htonl(dip);
699         dev = mlxsw_sp->rifs[rif]->dev;
700         n = neigh_lookup(&arp_tbl, &dipn, dev);
701         if (!n) {
702                 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
703                            &dip);
704                 return;
705         }
706
707         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
708         neigh_event_send(n, NULL);
709         neigh_release(n);
710 }
711
712 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
713                                                    char *rauhtd_pl,
714                                                    int rec_index)
715 {
716         u8 num_entries;
717         int i;
718
719         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
720                                                                 rec_index);
721         /* Hardware starts counting at 0, so add 1. */
722         num_entries++;
723
724         /* Each record consists of several neighbour entries. */
725         for (i = 0; i < num_entries; i++) {
726                 int ent_index;
727
728                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
729                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
730                                                        ent_index);
731         }
732
733 }
734
735 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
736                                               char *rauhtd_pl, int rec_index)
737 {
738         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
739         case MLXSW_REG_RAUHTD_TYPE_IPV4:
740                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
741                                                        rec_index);
742                 break;
743         case MLXSW_REG_RAUHTD_TYPE_IPV6:
744                 WARN_ON_ONCE(1);
745                 break;
746         }
747 }
748
749 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
750 {
751         u8 num_rec, last_rec_index, num_entries;
752
753         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
754         last_rec_index = num_rec - 1;
755
756         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
757                 return false;
758         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
759             MLXSW_REG_RAUHTD_TYPE_IPV6)
760                 return true;
761
762         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
763                                                                 last_rec_index);
764         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
765                 return true;
766         return false;
767 }
768
769 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
770 {
771         char *rauhtd_pl;
772         u8 num_rec;
773         int i, err;
774
775         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
776         if (!rauhtd_pl)
777                 return -ENOMEM;
778
779         /* Make sure the neighbour's netdev isn't removed in the
780          * process.
781          */
782         rtnl_lock();
783         do {
784                 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
785                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
786                                       rauhtd_pl);
787                 if (err) {
788                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
789                         break;
790                 }
791                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
792                 for (i = 0; i < num_rec; i++)
793                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
794                                                           i);
795         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
796         rtnl_unlock();
797
798         kfree(rauhtd_pl);
799         return err;
800 }
801
802 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
803 {
804         struct mlxsw_sp_neigh_entry *neigh_entry;
805
806         /* Take RTNL mutex here to prevent lists from changes */
807         rtnl_lock();
808         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
809                             nexthop_neighs_list_node)
810                 /* If this neigh have nexthops, make the kernel think this neigh
811                  * is active regardless of the traffic.
812                  */
813                 neigh_event_send(neigh_entry->key.n, NULL);
814         rtnl_unlock();
815 }
816
817 static void
818 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
819 {
820         unsigned long interval = mlxsw_sp->router.neighs_update.interval;
821
822         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
823                                msecs_to_jiffies(interval));
824 }
825
826 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
827 {
828         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
829                                                  router.neighs_update.dw.work);
830         int err;
831
832         err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
833         if (err)
834                 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
835
836         mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
837
838         mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
839 }
840
841 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
842 {
843         struct mlxsw_sp_neigh_entry *neigh_entry;
844         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
845                                                  router.nexthop_probe_dw.work);
846
847         /* Iterate over nexthop neighbours, find those who are unresolved and
848          * send arp on them. This solves the chicken-egg problem when
849          * the nexthop wouldn't get offloaded until the neighbor is resolved
850          * but it wouldn't get resolved ever in case traffic is flowing in HW
851          * using different nexthop.
852          *
853          * Take RTNL mutex here to prevent lists from changes.
854          */
855         rtnl_lock();
856         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
857                             nexthop_neighs_list_node)
858                 if (!neigh_entry->connected)
859                         neigh_event_send(neigh_entry->key.n, NULL);
860         rtnl_unlock();
861
862         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
863                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
864 }
865
866 static void
867 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
868                               struct mlxsw_sp_neigh_entry *neigh_entry,
869                               bool removing);
870
871 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
872 {
873         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
874                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
875 }
876
877 static void
878 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
879                                 struct mlxsw_sp_neigh_entry *neigh_entry,
880                                 enum mlxsw_reg_rauht_op op)
881 {
882         struct neighbour *n = neigh_entry->key.n;
883         u32 dip = ntohl(*((__be32 *) n->primary_key));
884         char rauht_pl[MLXSW_REG_RAUHT_LEN];
885
886         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
887                               dip);
888         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
889 }
890
891 static void
892 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
893                             struct mlxsw_sp_neigh_entry *neigh_entry,
894                             bool adding)
895 {
896         if (!adding && !neigh_entry->connected)
897                 return;
898         neigh_entry->connected = adding;
899         if (neigh_entry->key.n->tbl == &arp_tbl)
900                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
901                                                 mlxsw_sp_rauht_op(adding));
902         else
903                 WARN_ON_ONCE(1);
904 }
905
906 struct mlxsw_sp_neigh_event_work {
907         struct work_struct work;
908         struct mlxsw_sp *mlxsw_sp;
909         struct neighbour *n;
910 };
911
912 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
913 {
914         struct mlxsw_sp_neigh_event_work *neigh_work =
915                 container_of(work, struct mlxsw_sp_neigh_event_work, work);
916         struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
917         struct mlxsw_sp_neigh_entry *neigh_entry;
918         struct neighbour *n = neigh_work->n;
919         unsigned char ha[ETH_ALEN];
920         bool entry_connected;
921         u8 nud_state, dead;
922
923         /* If these parameters are changed after we release the lock,
924          * then we are guaranteed to receive another event letting us
925          * know about it.
926          */
927         read_lock_bh(&n->lock);
928         memcpy(ha, n->ha, ETH_ALEN);
929         nud_state = n->nud_state;
930         dead = n->dead;
931         read_unlock_bh(&n->lock);
932
933         rtnl_lock();
934         entry_connected = nud_state & NUD_VALID && !dead;
935         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
936         if (!entry_connected && !neigh_entry)
937                 goto out;
938         if (!neigh_entry) {
939                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
940                 if (IS_ERR(neigh_entry))
941                         goto out;
942         }
943
944         memcpy(neigh_entry->ha, ha, ETH_ALEN);
945         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
946         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
947
948         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
949                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
950
951 out:
952         rtnl_unlock();
953         neigh_release(n);
954         kfree(neigh_work);
955 }
956
957 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
958                                    unsigned long event, void *ptr)
959 {
960         struct mlxsw_sp_neigh_event_work *neigh_work;
961         struct mlxsw_sp_port *mlxsw_sp_port;
962         struct mlxsw_sp *mlxsw_sp;
963         unsigned long interval;
964         struct neigh_parms *p;
965         struct neighbour *n;
966
967         switch (event) {
968         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
969                 p = ptr;
970
971                 /* We don't care about changes in the default table. */
972                 if (!p->dev || p->tbl != &arp_tbl)
973                         return NOTIFY_DONE;
974
975                 /* We are in atomic context and can't take RTNL mutex,
976                  * so use RCU variant to walk the device chain.
977                  */
978                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
979                 if (!mlxsw_sp_port)
980                         return NOTIFY_DONE;
981
982                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
983                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
984                 mlxsw_sp->router.neighs_update.interval = interval;
985
986                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
987                 break;
988         case NETEVENT_NEIGH_UPDATE:
989                 n = ptr;
990
991                 if (n->tbl != &arp_tbl)
992                         return NOTIFY_DONE;
993
994                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
995                 if (!mlxsw_sp_port)
996                         return NOTIFY_DONE;
997
998                 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
999                 if (!neigh_work) {
1000                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
1001                         return NOTIFY_BAD;
1002                 }
1003
1004                 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1005                 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1006                 neigh_work->n = n;
1007
1008                 /* Take a reference to ensure the neighbour won't be
1009                  * destructed until we drop the reference in delayed
1010                  * work.
1011                  */
1012                 neigh_clone(n);
1013                 mlxsw_core_schedule_work(&neigh_work->work);
1014                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1015                 break;
1016         }
1017
1018         return NOTIFY_DONE;
1019 }
1020
1021 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1022 {
1023         int err;
1024
1025         err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1026                               &mlxsw_sp_neigh_ht_params);
1027         if (err)
1028                 return err;
1029
1030         /* Initialize the polling interval according to the default
1031          * table.
1032          */
1033         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1034
1035         /* Create the delayed works for the activity_update */
1036         INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1037                           mlxsw_sp_router_neighs_update_work);
1038         INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1039                           mlxsw_sp_router_probe_unresolved_nexthops);
1040         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1041         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1042         return 0;
1043 }
1044
1045 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1046 {
1047         cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1048         cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1049         rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1050 }
1051
1052 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1053                                     const struct mlxsw_sp_rif *r)
1054 {
1055         char rauht_pl[MLXSW_REG_RAUHT_LEN];
1056
1057         mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1058                              r->rif, r->addr);
1059         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1060 }
1061
1062 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1063                                          struct mlxsw_sp_rif *r)
1064 {
1065         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1066
1067         mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
1068         list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
1069                                  rif_list_node)
1070                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1071 }
1072
1073 struct mlxsw_sp_nexthop_key {
1074         struct fib_nh *fib_nh;
1075 };
1076
1077 struct mlxsw_sp_nexthop {
1078         struct list_head neigh_list_node; /* member of neigh entry list */
1079         struct list_head rif_list_node;
1080         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1081                                                 * this belongs to
1082                                                 */
1083         struct rhash_head ht_node;
1084         struct mlxsw_sp_nexthop_key key;
1085         struct mlxsw_sp_rif *r;
1086         u8 should_offload:1, /* set indicates this neigh is connected and
1087                               * should be put to KVD linear area of this group.
1088                               */
1089            offloaded:1, /* set in case the neigh is actually put into
1090                          * KVD linear area of this group.
1091                          */
1092            update:1; /* set indicates that MAC of this neigh should be
1093                       * updated in HW
1094                       */
1095         struct mlxsw_sp_neigh_entry *neigh_entry;
1096 };
1097
1098 struct mlxsw_sp_nexthop_group_key {
1099         struct fib_info *fi;
1100 };
1101
1102 struct mlxsw_sp_nexthop_group {
1103         struct rhash_head ht_node;
1104         struct list_head fib_list; /* list of fib entries that use this group */
1105         struct mlxsw_sp_nexthop_group_key key;
1106         u8 adj_index_valid:1,
1107            gateway:1; /* routes using the group use a gateway */
1108         u32 adj_index;
1109         u16 ecmp_size;
1110         u16 count;
1111         struct mlxsw_sp_nexthop nexthops[0];
1112 #define nh_rif  nexthops[0].r
1113 };
1114
1115 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1116         .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1117         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1118         .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1119 };
1120
1121 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1122                                          struct mlxsw_sp_nexthop_group *nh_grp)
1123 {
1124         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1125                                       &nh_grp->ht_node,
1126                                       mlxsw_sp_nexthop_group_ht_params);
1127 }
1128
1129 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1130                                           struct mlxsw_sp_nexthop_group *nh_grp)
1131 {
1132         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1133                                &nh_grp->ht_node,
1134                                mlxsw_sp_nexthop_group_ht_params);
1135 }
1136
1137 static struct mlxsw_sp_nexthop_group *
1138 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1139                               struct mlxsw_sp_nexthop_group_key key)
1140 {
1141         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1142                                       mlxsw_sp_nexthop_group_ht_params);
1143 }
1144
1145 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1146         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1147         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1148         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1149 };
1150
1151 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1152                                    struct mlxsw_sp_nexthop *nh)
1153 {
1154         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1155                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1156 }
1157
1158 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1159                                     struct mlxsw_sp_nexthop *nh)
1160 {
1161         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1162                                mlxsw_sp_nexthop_ht_params);
1163 }
1164
1165 static struct mlxsw_sp_nexthop *
1166 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1167                         struct mlxsw_sp_nexthop_key key)
1168 {
1169         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1170                                       mlxsw_sp_nexthop_ht_params);
1171 }
1172
1173 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1174                                              struct mlxsw_sp_vr *vr,
1175                                              u32 adj_index, u16 ecmp_size,
1176                                              u32 new_adj_index,
1177                                              u16 new_ecmp_size)
1178 {
1179         char raleu_pl[MLXSW_REG_RALEU_LEN];
1180
1181         mlxsw_reg_raleu_pack(raleu_pl,
1182                              (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1183                              adj_index, ecmp_size, new_adj_index,
1184                              new_ecmp_size);
1185         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1186 }
1187
1188 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1189                                           struct mlxsw_sp_nexthop_group *nh_grp,
1190                                           u32 old_adj_index, u16 old_ecmp_size)
1191 {
1192         struct mlxsw_sp_fib_entry *fib_entry;
1193         struct mlxsw_sp_vr *vr = NULL;
1194         int err;
1195
1196         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1197                 if (vr == fib_entry->fib_node->vr)
1198                         continue;
1199                 vr = fib_entry->fib_node->vr;
1200                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1201                                                         old_adj_index,
1202                                                         old_ecmp_size,
1203                                                         nh_grp->adj_index,
1204                                                         nh_grp->ecmp_size);
1205                 if (err)
1206                         return err;
1207         }
1208         return 0;
1209 }
1210
1211 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1212                                        struct mlxsw_sp_nexthop *nh)
1213 {
1214         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1215         char ratr_pl[MLXSW_REG_RATR_LEN];
1216
1217         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1218                             true, adj_index, neigh_entry->rif);
1219         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1220         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1221 }
1222
1223 static int
1224 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1225                                   struct mlxsw_sp_nexthop_group *nh_grp,
1226                                   bool reallocate)
1227 {
1228         u32 adj_index = nh_grp->adj_index; /* base */
1229         struct mlxsw_sp_nexthop *nh;
1230         int i;
1231         int err;
1232
1233         for (i = 0; i < nh_grp->count; i++) {
1234                 nh = &nh_grp->nexthops[i];
1235
1236                 if (!nh->should_offload) {
1237                         nh->offloaded = 0;
1238                         continue;
1239                 }
1240
1241                 if (nh->update || reallocate) {
1242                         err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1243                                                           adj_index, nh);
1244                         if (err)
1245                                 return err;
1246                         nh->update = 0;
1247                         nh->offloaded = 1;
1248                 }
1249                 adj_index++;
1250         }
1251         return 0;
1252 }
1253
1254 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1255                                      struct mlxsw_sp_fib_entry *fib_entry);
1256
1257 static int
1258 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1259                                     struct mlxsw_sp_nexthop_group *nh_grp)
1260 {
1261         struct mlxsw_sp_fib_entry *fib_entry;
1262         int err;
1263
1264         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1265                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1266                 if (err)
1267                         return err;
1268         }
1269         return 0;
1270 }
1271
1272 static void
1273 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1274                                struct mlxsw_sp_nexthop_group *nh_grp)
1275 {
1276         struct mlxsw_sp_nexthop *nh;
1277         bool offload_change = false;
1278         u32 adj_index;
1279         u16 ecmp_size = 0;
1280         bool old_adj_index_valid;
1281         u32 old_adj_index;
1282         u16 old_ecmp_size;
1283         int ret;
1284         int i;
1285         int err;
1286
1287         if (!nh_grp->gateway) {
1288                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1289                 return;
1290         }
1291
1292         for (i = 0; i < nh_grp->count; i++) {
1293                 nh = &nh_grp->nexthops[i];
1294
1295                 if (nh->should_offload ^ nh->offloaded) {
1296                         offload_change = true;
1297                         if (nh->should_offload)
1298                                 nh->update = 1;
1299                 }
1300                 if (nh->should_offload)
1301                         ecmp_size++;
1302         }
1303         if (!offload_change) {
1304                 /* Nothing was added or removed, so no need to reallocate. Just
1305                  * update MAC on existing adjacency indexes.
1306                  */
1307                 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1308                                                         false);
1309                 if (err) {
1310                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1311                         goto set_trap;
1312                 }
1313                 return;
1314         }
1315         if (!ecmp_size)
1316                 /* No neigh of this group is connected so we just set
1317                  * the trap and let everthing flow through kernel.
1318                  */
1319                 goto set_trap;
1320
1321         ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1322         if (ret < 0) {
1323                 /* We ran out of KVD linear space, just set the
1324                  * trap and let everything flow through kernel.
1325                  */
1326                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1327                 goto set_trap;
1328         }
1329         adj_index = ret;
1330         old_adj_index_valid = nh_grp->adj_index_valid;
1331         old_adj_index = nh_grp->adj_index;
1332         old_ecmp_size = nh_grp->ecmp_size;
1333         nh_grp->adj_index_valid = 1;
1334         nh_grp->adj_index = adj_index;
1335         nh_grp->ecmp_size = ecmp_size;
1336         err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1337         if (err) {
1338                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1339                 goto set_trap;
1340         }
1341
1342         if (!old_adj_index_valid) {
1343                 /* The trap was set for fib entries, so we have to call
1344                  * fib entry update to unset it and use adjacency index.
1345                  */
1346                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1347                 if (err) {
1348                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1349                         goto set_trap;
1350                 }
1351                 return;
1352         }
1353
1354         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1355                                              old_adj_index, old_ecmp_size);
1356         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1357         if (err) {
1358                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1359                 goto set_trap;
1360         }
1361         return;
1362
1363 set_trap:
1364         old_adj_index_valid = nh_grp->adj_index_valid;
1365         nh_grp->adj_index_valid = 0;
1366         for (i = 0; i < nh_grp->count; i++) {
1367                 nh = &nh_grp->nexthops[i];
1368                 nh->offloaded = 0;
1369         }
1370         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1371         if (err)
1372                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1373         if (old_adj_index_valid)
1374                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1375 }
1376
1377 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1378                                             bool removing)
1379 {
1380         if (!removing && !nh->should_offload)
1381                 nh->should_offload = 1;
1382         else if (removing && nh->offloaded)
1383                 nh->should_offload = 0;
1384         nh->update = 1;
1385 }
1386
1387 static void
1388 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1389                               struct mlxsw_sp_neigh_entry *neigh_entry,
1390                               bool removing)
1391 {
1392         struct mlxsw_sp_nexthop *nh;
1393
1394         list_for_each_entry(nh, &neigh_entry->nexthop_list,
1395                             neigh_list_node) {
1396                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1397                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1398         }
1399 }
1400
1401 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1402                                       struct mlxsw_sp_rif *r)
1403 {
1404         if (nh->r)
1405                 return;
1406
1407         nh->r = r;
1408         list_add(&nh->rif_list_node, &r->nexthop_list);
1409 }
1410
1411 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1412 {
1413         if (!nh->r)
1414                 return;
1415
1416         list_del(&nh->rif_list_node);
1417         nh->r = NULL;
1418 }
1419
1420 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1421                                        struct mlxsw_sp_nexthop *nh)
1422 {
1423         struct mlxsw_sp_neigh_entry *neigh_entry;
1424         struct fib_nh *fib_nh = nh->key.fib_nh;
1425         struct neighbour *n;
1426         u8 nud_state, dead;
1427         int err;
1428
1429         if (!nh->nh_grp->gateway || nh->neigh_entry)
1430                 return 0;
1431
1432         /* Take a reference of neigh here ensuring that neigh would
1433          * not be detructed before the nexthop entry is finished.
1434          * The reference is taken either in neigh_lookup() or
1435          * in neigh_create() in case n is not found.
1436          */
1437         n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1438         if (!n) {
1439                 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1440                 if (IS_ERR(n))
1441                         return PTR_ERR(n);
1442                 neigh_event_send(n, NULL);
1443         }
1444         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1445         if (!neigh_entry) {
1446                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1447                 if (IS_ERR(neigh_entry)) {
1448                         err = -EINVAL;
1449                         goto err_neigh_entry_create;
1450                 }
1451         }
1452
1453         /* If that is the first nexthop connected to that neigh, add to
1454          * nexthop_neighs_list
1455          */
1456         if (list_empty(&neigh_entry->nexthop_list))
1457                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1458                               &mlxsw_sp->router.nexthop_neighs_list);
1459
1460         nh->neigh_entry = neigh_entry;
1461         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1462         read_lock_bh(&n->lock);
1463         nud_state = n->nud_state;
1464         dead = n->dead;
1465         read_unlock_bh(&n->lock);
1466         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1467
1468         return 0;
1469
1470 err_neigh_entry_create:
1471         neigh_release(n);
1472         return err;
1473 }
1474
1475 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1476                                         struct mlxsw_sp_nexthop *nh)
1477 {
1478         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1479         struct neighbour *n;
1480
1481         if (!neigh_entry)
1482                 return;
1483         n = neigh_entry->key.n;
1484
1485         __mlxsw_sp_nexthop_neigh_update(nh, true);
1486         list_del(&nh->neigh_list_node);
1487         nh->neigh_entry = NULL;
1488
1489         /* If that is the last nexthop connected to that neigh, remove from
1490          * nexthop_neighs_list
1491          */
1492         if (list_empty(&neigh_entry->nexthop_list))
1493                 list_del(&neigh_entry->nexthop_neighs_list_node);
1494
1495         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1496                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1497
1498         neigh_release(n);
1499 }
1500
1501 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1502                                  struct mlxsw_sp_nexthop_group *nh_grp,
1503                                  struct mlxsw_sp_nexthop *nh,
1504                                  struct fib_nh *fib_nh)
1505 {
1506         struct net_device *dev = fib_nh->nh_dev;
1507         struct in_device *in_dev;
1508         struct mlxsw_sp_rif *r;
1509         int err;
1510
1511         nh->nh_grp = nh_grp;
1512         nh->key.fib_nh = fib_nh;
1513         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1514         if (err)
1515                 return err;
1516
1517         in_dev = __in_dev_get_rtnl(dev);
1518         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1519             fib_nh->nh_flags & RTNH_F_LINKDOWN)
1520                 return 0;
1521
1522         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1523         if (!r)
1524                 return 0;
1525         mlxsw_sp_nexthop_rif_init(nh, r);
1526
1527         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1528         if (err)
1529                 goto err_nexthop_neigh_init;
1530
1531         return 0;
1532
1533 err_nexthop_neigh_init:
1534         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1535         return err;
1536 }
1537
1538 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1539                                   struct mlxsw_sp_nexthop *nh)
1540 {
1541         mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1542         mlxsw_sp_nexthop_rif_fini(nh);
1543         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1544 }
1545
1546 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1547                                    unsigned long event, struct fib_nh *fib_nh)
1548 {
1549         struct mlxsw_sp_nexthop_key key;
1550         struct mlxsw_sp_nexthop *nh;
1551         struct mlxsw_sp_rif *r;
1552
1553         if (mlxsw_sp->router.aborted)
1554                 return;
1555
1556         key.fib_nh = fib_nh;
1557         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1558         if (WARN_ON_ONCE(!nh))
1559                 return;
1560
1561         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1562         if (!r)
1563                 return;
1564
1565         switch (event) {
1566         case FIB_EVENT_NH_ADD:
1567                 mlxsw_sp_nexthop_rif_init(nh, r);
1568                 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1569                 break;
1570         case FIB_EVENT_NH_DEL:
1571                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1572                 mlxsw_sp_nexthop_rif_fini(nh);
1573                 break;
1574         }
1575
1576         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1577 }
1578
1579 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1580                                            struct mlxsw_sp_rif *r)
1581 {
1582         struct mlxsw_sp_nexthop *nh, *tmp;
1583
1584         list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
1585                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1586                 mlxsw_sp_nexthop_rif_fini(nh);
1587                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1588         }
1589 }
1590
1591 static struct mlxsw_sp_nexthop_group *
1592 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1593 {
1594         struct mlxsw_sp_nexthop_group *nh_grp;
1595         struct mlxsw_sp_nexthop *nh;
1596         struct fib_nh *fib_nh;
1597         size_t alloc_size;
1598         int i;
1599         int err;
1600
1601         alloc_size = sizeof(*nh_grp) +
1602                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1603         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1604         if (!nh_grp)
1605                 return ERR_PTR(-ENOMEM);
1606         INIT_LIST_HEAD(&nh_grp->fib_list);
1607         nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1608         nh_grp->count = fi->fib_nhs;
1609         nh_grp->key.fi = fi;
1610         for (i = 0; i < nh_grp->count; i++) {
1611                 nh = &nh_grp->nexthops[i];
1612                 fib_nh = &fi->fib_nh[i];
1613                 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1614                 if (err)
1615                         goto err_nexthop_init;
1616         }
1617         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1618         if (err)
1619                 goto err_nexthop_group_insert;
1620         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1621         return nh_grp;
1622
1623 err_nexthop_group_insert:
1624 err_nexthop_init:
1625         for (i--; i >= 0; i--) {
1626                 nh = &nh_grp->nexthops[i];
1627                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1628         }
1629         kfree(nh_grp);
1630         return ERR_PTR(err);
1631 }
1632
1633 static void
1634 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1635                                struct mlxsw_sp_nexthop_group *nh_grp)
1636 {
1637         struct mlxsw_sp_nexthop *nh;
1638         int i;
1639
1640         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1641         for (i = 0; i < nh_grp->count; i++) {
1642                 nh = &nh_grp->nexthops[i];
1643                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1644         }
1645         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1646         WARN_ON_ONCE(nh_grp->adj_index_valid);
1647         kfree(nh_grp);
1648 }
1649
1650 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1651                                       struct mlxsw_sp_fib_entry *fib_entry,
1652                                       struct fib_info *fi)
1653 {
1654         struct mlxsw_sp_nexthop_group_key key;
1655         struct mlxsw_sp_nexthop_group *nh_grp;
1656
1657         key.fi = fi;
1658         nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1659         if (!nh_grp) {
1660                 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1661                 if (IS_ERR(nh_grp))
1662                         return PTR_ERR(nh_grp);
1663         }
1664         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1665         fib_entry->nh_group = nh_grp;
1666         return 0;
1667 }
1668
1669 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1670                                        struct mlxsw_sp_fib_entry *fib_entry)
1671 {
1672         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1673
1674         list_del(&fib_entry->nexthop_group_node);
1675         if (!list_empty(&nh_grp->fib_list))
1676                 return;
1677         mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1678 }
1679
1680 static bool
1681 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1682 {
1683         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1684
1685         if (fib_entry->params.tos)
1686                 return false;
1687
1688         switch (fib_entry->type) {
1689         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1690                 return !!nh_group->adj_index_valid;
1691         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1692                 return !!nh_group->nh_rif;
1693         default:
1694                 return false;
1695         }
1696 }
1697
1698 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1699 {
1700         fib_entry->offloaded = true;
1701
1702         switch (fib_entry->fib_node->vr->proto) {
1703         case MLXSW_SP_L3_PROTO_IPV4:
1704                 fib_info_offload_inc(fib_entry->nh_group->key.fi);
1705                 break;
1706         case MLXSW_SP_L3_PROTO_IPV6:
1707                 WARN_ON_ONCE(1);
1708         }
1709 }
1710
1711 static void
1712 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1713 {
1714         switch (fib_entry->fib_node->vr->proto) {
1715         case MLXSW_SP_L3_PROTO_IPV4:
1716                 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1717                 break;
1718         case MLXSW_SP_L3_PROTO_IPV6:
1719                 WARN_ON_ONCE(1);
1720         }
1721
1722         fib_entry->offloaded = false;
1723 }
1724
1725 static void
1726 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1727                                    enum mlxsw_reg_ralue_op op, int err)
1728 {
1729         switch (op) {
1730         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1731                 if (!fib_entry->offloaded)
1732                         return;
1733                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1734         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1735                 if (err)
1736                         return;
1737                 if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1738                     !fib_entry->offloaded)
1739                         mlxsw_sp_fib_entry_offload_set(fib_entry);
1740                 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1741                          fib_entry->offloaded)
1742                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
1743                 return;
1744         default:
1745                 return;
1746         }
1747 }
1748
1749 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1750                                          struct mlxsw_sp_fib_entry *fib_entry,
1751                                          enum mlxsw_reg_ralue_op op)
1752 {
1753         char ralue_pl[MLXSW_REG_RALUE_LEN];
1754         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1755         struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1756         enum mlxsw_reg_ralue_trap_action trap_action;
1757         u16 trap_id = 0;
1758         u32 adjacency_index = 0;
1759         u16 ecmp_size = 0;
1760
1761         /* In case the nexthop group adjacency index is valid, use it
1762          * with provided ECMP size. Otherwise, setup trap and pass
1763          * traffic to kernel.
1764          */
1765         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1766                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1767                 adjacency_index = fib_entry->nh_group->adj_index;
1768                 ecmp_size = fib_entry->nh_group->ecmp_size;
1769         } else {
1770                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1771                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1772         }
1773
1774         mlxsw_reg_ralue_pack4(ralue_pl,
1775                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1776                               vr->id, fib_entry->fib_node->key.prefix_len,
1777                               *p_dip);
1778         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1779                                         adjacency_index, ecmp_size);
1780         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1781 }
1782
1783 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1784                                         struct mlxsw_sp_fib_entry *fib_entry,
1785                                         enum mlxsw_reg_ralue_op op)
1786 {
1787         struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1788         enum mlxsw_reg_ralue_trap_action trap_action;
1789         char ralue_pl[MLXSW_REG_RALUE_LEN];
1790         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1791         struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1792         u16 trap_id = 0;
1793         u16 rif = 0;
1794
1795         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1796                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1797                 rif = r->rif;
1798         } else {
1799                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1800                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1801         }
1802
1803         mlxsw_reg_ralue_pack4(ralue_pl,
1804                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1805                               vr->id, fib_entry->fib_node->key.prefix_len,
1806                               *p_dip);
1807         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
1808         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1809 }
1810
1811 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1812                                        struct mlxsw_sp_fib_entry *fib_entry,
1813                                        enum mlxsw_reg_ralue_op op)
1814 {
1815         char ralue_pl[MLXSW_REG_RALUE_LEN];
1816         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1817         struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1818
1819         mlxsw_reg_ralue_pack4(ralue_pl,
1820                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1821                               vr->id, fib_entry->fib_node->key.prefix_len,
1822                               *p_dip);
1823         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1824         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1825 }
1826
1827 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1828                                   struct mlxsw_sp_fib_entry *fib_entry,
1829                                   enum mlxsw_reg_ralue_op op)
1830 {
1831         switch (fib_entry->type) {
1832         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1833                 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1834         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1835                 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1836         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1837                 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1838         }
1839         return -EINVAL;
1840 }
1841
1842 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1843                                  struct mlxsw_sp_fib_entry *fib_entry,
1844                                  enum mlxsw_reg_ralue_op op)
1845 {
1846         int err = -EINVAL;
1847
1848         switch (fib_entry->fib_node->vr->proto) {
1849         case MLXSW_SP_L3_PROTO_IPV4:
1850                 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1851                 break;
1852         case MLXSW_SP_L3_PROTO_IPV6:
1853                 return err;
1854         }
1855         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
1856         return err;
1857 }
1858
1859 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1860                                      struct mlxsw_sp_fib_entry *fib_entry)
1861 {
1862         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1863                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
1864 }
1865
1866 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1867                                   struct mlxsw_sp_fib_entry *fib_entry)
1868 {
1869         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1870                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
1871 }
1872
1873 static int
1874 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
1875                              const struct fib_entry_notifier_info *fen_info,
1876                              struct mlxsw_sp_fib_entry *fib_entry)
1877 {
1878         struct fib_info *fi = fen_info->fi;
1879
1880         if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1881                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1882                 return 0;
1883         }
1884         if (fen_info->type != RTN_UNICAST)
1885                 return -EINVAL;
1886         if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1887                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1888         else
1889                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1890         return 0;
1891 }
1892
1893 static struct mlxsw_sp_fib_entry *
1894 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
1895                            struct mlxsw_sp_fib_node *fib_node,
1896                            const struct fib_entry_notifier_info *fen_info)
1897 {
1898         struct mlxsw_sp_fib_entry *fib_entry;
1899         int err;
1900
1901         fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
1902         if (!fib_entry) {
1903                 err = -ENOMEM;
1904                 goto err_fib_entry_alloc;
1905         }
1906
1907         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
1908         if (err)
1909                 goto err_fib4_entry_type_set;
1910
1911         err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
1912         if (err)
1913                 goto err_nexthop_group_get;
1914
1915         fib_entry->params.prio = fen_info->fi->fib_priority;
1916         fib_entry->params.tb_id = fen_info->tb_id;
1917         fib_entry->params.type = fen_info->type;
1918         fib_entry->params.tos = fen_info->tos;
1919
1920         fib_entry->fib_node = fib_node;
1921
1922         return fib_entry;
1923
1924 err_nexthop_group_get:
1925 err_fib4_entry_type_set:
1926         kfree(fib_entry);
1927 err_fib_entry_alloc:
1928         return ERR_PTR(err);
1929 }
1930
1931 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1932                                         struct mlxsw_sp_fib_entry *fib_entry)
1933 {
1934         mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1935         kfree(fib_entry);
1936 }
1937
1938 static struct mlxsw_sp_fib_node *
1939 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
1940                        const struct fib_entry_notifier_info *fen_info);
1941
1942 static struct mlxsw_sp_fib_entry *
1943 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
1944                            const struct fib_entry_notifier_info *fen_info)
1945 {
1946         struct mlxsw_sp_fib_entry *fib_entry;
1947         struct mlxsw_sp_fib_node *fib_node;
1948
1949         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
1950         if (IS_ERR(fib_node))
1951                 return NULL;
1952
1953         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
1954                 if (fib_entry->params.tb_id == fen_info->tb_id &&
1955                     fib_entry->params.tos == fen_info->tos &&
1956                     fib_entry->params.type == fen_info->type &&
1957                     fib_entry->nh_group->key.fi == fen_info->fi) {
1958                         return fib_entry;
1959                 }
1960         }
1961
1962         return NULL;
1963 }
1964
1965 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
1966         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
1967         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
1968         .key_len = sizeof(struct mlxsw_sp_fib_key),
1969         .automatic_shrinking = true,
1970 };
1971
1972 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
1973                                     struct mlxsw_sp_fib_node *fib_node)
1974 {
1975         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
1976                                       mlxsw_sp_fib_ht_params);
1977 }
1978
1979 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
1980                                      struct mlxsw_sp_fib_node *fib_node)
1981 {
1982         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
1983                                mlxsw_sp_fib_ht_params);
1984 }
1985
1986 static struct mlxsw_sp_fib_node *
1987 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1988                          size_t addr_len, unsigned char prefix_len)
1989 {
1990         struct mlxsw_sp_fib_key key;
1991
1992         memset(&key, 0, sizeof(key));
1993         memcpy(key.addr, addr, addr_len);
1994         key.prefix_len = prefix_len;
1995         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
1996 }
1997
1998 static struct mlxsw_sp_fib_node *
1999 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
2000                          size_t addr_len, unsigned char prefix_len)
2001 {
2002         struct mlxsw_sp_fib_node *fib_node;
2003
2004         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2005         if (!fib_node)
2006                 return NULL;
2007
2008         INIT_LIST_HEAD(&fib_node->entry_list);
2009         list_add(&fib_node->list, &vr->fib->node_list);
2010         memcpy(fib_node->key.addr, addr, addr_len);
2011         fib_node->key.prefix_len = prefix_len;
2012         mlxsw_sp_fib_node_insert(vr->fib, fib_node);
2013         fib_node->vr = vr;
2014
2015         return fib_node;
2016 }
2017
2018 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2019 {
2020         mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
2021         list_del(&fib_node->list);
2022         WARN_ON(!list_empty(&fib_node->entry_list));
2023         kfree(fib_node);
2024 }
2025
2026 static bool
2027 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2028                                  const struct mlxsw_sp_fib_entry *fib_entry)
2029 {
2030         return list_first_entry(&fib_node->entry_list,
2031                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
2032 }
2033
2034 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2035 {
2036         unsigned char prefix_len = fib_node->key.prefix_len;
2037         struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2038
2039         if (fib->prefix_ref_count[prefix_len]++ == 0)
2040                 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2041 }
2042
2043 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2044 {
2045         unsigned char prefix_len = fib_node->key.prefix_len;
2046         struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2047
2048         if (--fib->prefix_ref_count[prefix_len] == 0)
2049                 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2050 }
2051
2052 static struct mlxsw_sp_fib_node *
2053 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2054                        const struct fib_entry_notifier_info *fen_info)
2055 {
2056         struct mlxsw_sp_fib_node *fib_node;
2057         struct mlxsw_sp_vr *vr;
2058         int err;
2059
2060         vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
2061                              MLXSW_SP_L3_PROTO_IPV4);
2062         if (IS_ERR(vr))
2063                 return ERR_CAST(vr);
2064
2065         fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
2066                                             sizeof(fen_info->dst),
2067                                             fen_info->dst_len);
2068         if (fib_node)
2069                 return fib_node;
2070
2071         fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
2072                                             sizeof(fen_info->dst),
2073                                             fen_info->dst_len);
2074         if (!fib_node) {
2075                 err = -ENOMEM;
2076                 goto err_fib_node_create;
2077         }
2078
2079         return fib_node;
2080
2081 err_fib_node_create:
2082         mlxsw_sp_vr_put(mlxsw_sp, vr);
2083         return ERR_PTR(err);
2084 }
2085
2086 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2087                                    struct mlxsw_sp_fib_node *fib_node)
2088 {
2089         struct mlxsw_sp_vr *vr = fib_node->vr;
2090
2091         if (!list_empty(&fib_node->entry_list))
2092                 return;
2093         mlxsw_sp_fib_node_destroy(fib_node);
2094         mlxsw_sp_vr_put(mlxsw_sp, vr);
2095 }
2096
2097 static struct mlxsw_sp_fib_entry *
2098 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2099                               const struct mlxsw_sp_fib_entry_params *params)
2100 {
2101         struct mlxsw_sp_fib_entry *fib_entry;
2102
2103         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2104                 if (fib_entry->params.tb_id > params->tb_id)
2105                         continue;
2106                 if (fib_entry->params.tb_id != params->tb_id)
2107                         break;
2108                 if (fib_entry->params.tos > params->tos)
2109                         continue;
2110                 if (fib_entry->params.prio >= params->prio ||
2111                     fib_entry->params.tos < params->tos)
2112                         return fib_entry;
2113         }
2114
2115         return NULL;
2116 }
2117
2118 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2119                                           struct mlxsw_sp_fib_entry *new_entry)
2120 {
2121         struct mlxsw_sp_fib_node *fib_node;
2122
2123         if (WARN_ON(!fib_entry))
2124                 return -EINVAL;
2125
2126         fib_node = fib_entry->fib_node;
2127         list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2128                 if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2129                     fib_entry->params.tos != new_entry->params.tos ||
2130                     fib_entry->params.prio != new_entry->params.prio)
2131                         break;
2132         }
2133
2134         list_add_tail(&new_entry->list, &fib_entry->list);
2135         return 0;
2136 }
2137
2138 static int
2139 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2140                                struct mlxsw_sp_fib_entry *new_entry,
2141                                bool replace, bool append)
2142 {
2143         struct mlxsw_sp_fib_entry *fib_entry;
2144
2145         fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2146
2147         if (append)
2148                 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2149         if (replace && WARN_ON(!fib_entry))
2150                 return -EINVAL;
2151
2152         /* Insert new entry before replaced one, so that we can later
2153          * remove the second.
2154          */
2155         if (fib_entry) {
2156                 list_add_tail(&new_entry->list, &fib_entry->list);
2157         } else {
2158                 struct mlxsw_sp_fib_entry *last;
2159
2160                 list_for_each_entry(last, &fib_node->entry_list, list) {
2161                         if (new_entry->params.tb_id > last->params.tb_id)
2162                                 break;
2163                         fib_entry = last;
2164                 }
2165
2166                 if (fib_entry)
2167                         list_add(&new_entry->list, &fib_entry->list);
2168                 else
2169                         list_add(&new_entry->list, &fib_node->entry_list);
2170         }
2171
2172         return 0;
2173 }
2174
2175 static void
2176 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2177 {
2178         list_del(&fib_entry->list);
2179 }
2180
2181 static int
2182 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2183                              const struct mlxsw_sp_fib_node *fib_node,
2184                              struct mlxsw_sp_fib_entry *fib_entry)
2185 {
2186         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2187                 return 0;
2188
2189         /* To prevent packet loss, overwrite the previously offloaded
2190          * entry.
2191          */
2192         if (!list_is_singular(&fib_node->entry_list)) {
2193                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2194                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2195
2196                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2197         }
2198
2199         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2200 }
2201
2202 static void
2203 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2204                              const struct mlxsw_sp_fib_node *fib_node,
2205                              struct mlxsw_sp_fib_entry *fib_entry)
2206 {
2207         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2208                 return;
2209
2210         /* Promote the next entry by overwriting the deleted entry */
2211         if (!list_is_singular(&fib_node->entry_list)) {
2212                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2213                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2214
2215                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2216                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2217                 return;
2218         }
2219
2220         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2221 }
2222
2223 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2224                                          struct mlxsw_sp_fib_entry *fib_entry,
2225                                          bool replace, bool append)
2226 {
2227         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2228         int err;
2229
2230         err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2231                                              append);
2232         if (err)
2233                 return err;
2234
2235         err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2236         if (err)
2237                 goto err_fib4_node_entry_add;
2238
2239         mlxsw_sp_fib_node_prefix_inc(fib_node);
2240
2241         return 0;
2242
2243 err_fib4_node_entry_add:
2244         mlxsw_sp_fib4_node_list_remove(fib_entry);
2245         return err;
2246 }
2247
2248 static void
2249 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2250                                 struct mlxsw_sp_fib_entry *fib_entry)
2251 {
2252         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2253
2254         mlxsw_sp_fib_node_prefix_dec(fib_node);
2255         mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2256         mlxsw_sp_fib4_node_list_remove(fib_entry);
2257 }
2258
2259 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2260                                         struct mlxsw_sp_fib_entry *fib_entry,
2261                                         bool replace)
2262 {
2263         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2264         struct mlxsw_sp_fib_entry *replaced;
2265
2266         if (!replace)
2267                 return;
2268
2269         /* We inserted the new entry before replaced one */
2270         replaced = list_next_entry(fib_entry, list);
2271
2272         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2273         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2274         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2275 }
2276
2277 static int
2278 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2279                          const struct fib_entry_notifier_info *fen_info,
2280                          bool replace, bool append)
2281 {
2282         struct mlxsw_sp_fib_entry *fib_entry;
2283         struct mlxsw_sp_fib_node *fib_node;
2284         int err;
2285
2286         if (mlxsw_sp->router.aborted)
2287                 return 0;
2288
2289         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2290         if (IS_ERR(fib_node)) {
2291                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2292                 return PTR_ERR(fib_node);
2293         }
2294
2295         fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2296         if (IS_ERR(fib_entry)) {
2297                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2298                 err = PTR_ERR(fib_entry);
2299                 goto err_fib4_entry_create;
2300         }
2301
2302         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2303                                             append);
2304         if (err) {
2305                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2306                 goto err_fib4_node_entry_link;
2307         }
2308
2309         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2310
2311         return 0;
2312
2313 err_fib4_node_entry_link:
2314         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2315 err_fib4_entry_create:
2316         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2317         return err;
2318 }
2319
2320 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2321                                      struct fib_entry_notifier_info *fen_info)
2322 {
2323         struct mlxsw_sp_fib_entry *fib_entry;
2324         struct mlxsw_sp_fib_node *fib_node;
2325
2326         if (mlxsw_sp->router.aborted)
2327                 return;
2328
2329         fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2330         if (WARN_ON(!fib_entry))
2331                 return;
2332         fib_node = fib_entry->fib_node;
2333
2334         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2335         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2336         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2337 }
2338
2339 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2340 {
2341         char ralta_pl[MLXSW_REG_RALTA_LEN];
2342         char ralst_pl[MLXSW_REG_RALST_LEN];
2343         char raltb_pl[MLXSW_REG_RALTB_LEN];
2344         char ralue_pl[MLXSW_REG_RALUE_LEN];
2345         int err;
2346
2347         mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2348                              MLXSW_SP_LPM_TREE_MIN);
2349         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2350         if (err)
2351                 return err;
2352
2353         mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2354         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2355         if (err)
2356                 return err;
2357
2358         mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2359                              MLXSW_SP_LPM_TREE_MIN);
2360         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
2361         if (err)
2362                 return err;
2363
2364         mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2365                               MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
2366         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2367         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2368 }
2369
2370 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2371                                      struct mlxsw_sp_fib_node *fib_node)
2372 {
2373         struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2374
2375         list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2376                 bool do_break = &tmp->list == &fib_node->entry_list;
2377
2378                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2379                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2380                 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2381                 /* Break when entry list is empty and node was freed.
2382                  * Otherwise, we'll access freed memory in the next
2383                  * iteration.
2384                  */
2385                 if (do_break)
2386                         break;
2387         }
2388 }
2389
2390 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2391                                     struct mlxsw_sp_fib_node *fib_node)
2392 {
2393         switch (fib_node->vr->proto) {
2394         case MLXSW_SP_L3_PROTO_IPV4:
2395                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2396                 break;
2397         case MLXSW_SP_L3_PROTO_IPV6:
2398                 WARN_ON_ONCE(1);
2399                 break;
2400         }
2401 }
2402
2403 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2404 {
2405         struct mlxsw_sp_fib_node *fib_node, *tmp;
2406         struct mlxsw_sp_vr *vr;
2407         int i;
2408
2409         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2410                 vr = &mlxsw_sp->router.vrs[i];
2411
2412                 if (!vr->used)
2413                         continue;
2414
2415                 list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
2416                                          list) {
2417                         bool do_break = &tmp->list == &vr->fib->node_list;
2418
2419                         mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2420                         if (do_break)
2421                                 break;
2422                 }
2423         }
2424 }
2425
2426 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2427 {
2428         int err;
2429
2430         if (mlxsw_sp->router.aborted)
2431                 return;
2432         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2433         mlxsw_sp_router_fib_flush(mlxsw_sp);
2434         mlxsw_sp->router.aborted = true;
2435         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2436         if (err)
2437                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2438 }
2439
2440 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2441 {
2442         char ritr_pl[MLXSW_REG_RITR_LEN];
2443         int err;
2444
2445         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2446         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2447         if (WARN_ON_ONCE(err))
2448                 return err;
2449
2450         mlxsw_reg_ritr_enable_set(ritr_pl, false);
2451         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2452 }
2453
2454 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2455                                    struct mlxsw_sp_rif *r)
2456 {
2457         mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
2458         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
2459         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
2460 }
2461
2462 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2463 {
2464         char rgcr_pl[MLXSW_REG_RGCR_LEN];
2465         u64 max_rifs;
2466         int err;
2467
2468         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
2469                 return -EIO;
2470
2471         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2472         mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
2473                                  GFP_KERNEL);
2474         if (!mlxsw_sp->rifs)
2475                 return -ENOMEM;
2476
2477         mlxsw_reg_rgcr_pack(rgcr_pl, true);
2478         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
2479         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2480         if (err)
2481                 goto err_rgcr_fail;
2482
2483         return 0;
2484
2485 err_rgcr_fail:
2486         kfree(mlxsw_sp->rifs);
2487         return err;
2488 }
2489
2490 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2491 {
2492         char rgcr_pl[MLXSW_REG_RGCR_LEN];
2493         int i;
2494
2495         mlxsw_reg_rgcr_pack(rgcr_pl, false);
2496         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2497
2498         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2499                 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2500
2501         kfree(mlxsw_sp->rifs);
2502 }
2503
2504 struct mlxsw_sp_fib_event_work {
2505         struct work_struct work;
2506         union {
2507                 struct fib_entry_notifier_info fen_info;
2508                 struct fib_nh_notifier_info fnh_info;
2509         };
2510         struct mlxsw_sp *mlxsw_sp;
2511         unsigned long event;
2512 };
2513
2514 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2515 {
2516         struct mlxsw_sp_fib_event_work *fib_work =
2517                 container_of(work, struct mlxsw_sp_fib_event_work, work);
2518         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2519         bool replace, append;
2520         int err;
2521
2522         /* Protect internal structures from changes */
2523         rtnl_lock();
2524         switch (fib_work->event) {
2525         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2526         case FIB_EVENT_ENTRY_APPEND: /* fall through */
2527         case FIB_EVENT_ENTRY_ADD:
2528                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2529                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2530                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2531                                                replace, append);
2532                 if (err)
2533                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2534                 fib_info_put(fib_work->fen_info.fi);
2535                 break;
2536         case FIB_EVENT_ENTRY_DEL:
2537                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2538                 fib_info_put(fib_work->fen_info.fi);
2539                 break;
2540         case FIB_EVENT_RULE_ADD: /* fall through */
2541         case FIB_EVENT_RULE_DEL:
2542                 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2543                 break;
2544         case FIB_EVENT_NH_ADD: /* fall through */
2545         case FIB_EVENT_NH_DEL:
2546                 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2547                                        fib_work->fnh_info.fib_nh);
2548                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2549                 break;
2550         }
2551         rtnl_unlock();
2552         kfree(fib_work);
2553 }
2554
2555 /* Called with rcu_read_lock() */
2556 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2557                                      unsigned long event, void *ptr)
2558 {
2559         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2560         struct mlxsw_sp_fib_event_work *fib_work;
2561         struct fib_notifier_info *info = ptr;
2562
2563         if (!net_eq(info->net, &init_net))
2564                 return NOTIFY_DONE;
2565
2566         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2567         if (WARN_ON(!fib_work))
2568                 return NOTIFY_BAD;
2569
2570         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2571         fib_work->mlxsw_sp = mlxsw_sp;
2572         fib_work->event = event;
2573
2574         switch (event) {
2575         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2576         case FIB_EVENT_ENTRY_APPEND: /* fall through */
2577         case FIB_EVENT_ENTRY_ADD: /* fall through */
2578         case FIB_EVENT_ENTRY_DEL:
2579                 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2580                 /* Take referece on fib_info to prevent it from being
2581                  * freed while work is queued. Release it afterwards.
2582                  */
2583                 fib_info_hold(fib_work->fen_info.fi);
2584                 break;
2585         case FIB_EVENT_NH_ADD: /* fall through */
2586         case FIB_EVENT_NH_DEL:
2587                 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2588                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2589                 break;
2590         }
2591
2592         mlxsw_core_schedule_work(&fib_work->work);
2593
2594         return NOTIFY_DONE;
2595 }
2596
2597 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2598 {
2599         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2600
2601         /* Flush pending FIB notifications and then flush the device's
2602          * table before requesting another dump. The FIB notification
2603          * block is unregistered, so no need to take RTNL.
2604          */
2605         mlxsw_core_flush_owq();
2606         mlxsw_sp_router_fib_flush(mlxsw_sp);
2607 }
2608
2609 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2610 {
2611         int err;
2612
2613         INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2614         err = __mlxsw_sp_router_init(mlxsw_sp);
2615         if (err)
2616                 return err;
2617
2618         err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2619                               &mlxsw_sp_nexthop_ht_params);
2620         if (err)
2621                 goto err_nexthop_ht_init;
2622
2623         err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2624                               &mlxsw_sp_nexthop_group_ht_params);
2625         if (err)
2626                 goto err_nexthop_group_ht_init;
2627
2628         mlxsw_sp_lpm_init(mlxsw_sp);
2629         err = mlxsw_sp_vrs_init(mlxsw_sp);
2630         if (err)
2631                 goto err_vrs_init;
2632
2633         err = mlxsw_sp_neigh_init(mlxsw_sp);
2634         if (err)
2635                 goto err_neigh_init;
2636
2637         mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2638         err = register_fib_notifier(&mlxsw_sp->fib_nb,
2639                                     mlxsw_sp_router_fib_dump_flush);
2640         if (err)
2641                 goto err_register_fib_notifier;
2642
2643         return 0;
2644
2645 err_register_fib_notifier:
2646         mlxsw_sp_neigh_fini(mlxsw_sp);
2647 err_neigh_init:
2648         mlxsw_sp_vrs_fini(mlxsw_sp);
2649 err_vrs_init:
2650         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2651 err_nexthop_group_ht_init:
2652         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2653 err_nexthop_ht_init:
2654         __mlxsw_sp_router_fini(mlxsw_sp);
2655         return err;
2656 }
2657
2658 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2659 {
2660         unregister_fib_notifier(&mlxsw_sp->fib_nb);
2661         mlxsw_sp_neigh_fini(mlxsw_sp);
2662         mlxsw_sp_vrs_fini(mlxsw_sp);
2663         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2664         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2665         __mlxsw_sp_router_fini(mlxsw_sp);
2666 }