]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv4/fib_frontend.c
63b11ca54d954e00c99c6b23cdcdb0bda5475951
[karo-tx-linux.git] / net / ipv4 / fib_frontend.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  *
10  *              This program is free software; you can redistribute it and/or
11  *              modify it under the terms of the GNU General Public License
12  *              as published by the Free Software Foundation; either version
13  *              2 of the License, or (at your option) any later version.
14  */
15
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <linux/bitops.h>
19 #include <linux/capability.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/mm.h>
23 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/in.h>
28 #include <linux/inet.h>
29 #include <linux/inetdevice.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_addr.h>
32 #include <linux/if_arp.h>
33 #include <linux/skbuff.h>
34 #include <linux/init.h>
35 #include <linux/list.h>
36 #include <linux/slab.h>
37
38 #include <net/ip.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
41 #include <net/tcp.h>
42 #include <net/sock.h>
43 #include <net/arp.h>
44 #include <net/ip_fib.h>
45 #include <net/rtnetlink.h>
46 #include <net/xfrm.h>
47
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
49
50 static int __net_init fib4_rules_init(struct net *net)
51 {
52         struct fib_table *local_table, *main_table;
53
54         local_table = fib_trie_table(RT_TABLE_LOCAL);
55         if (local_table == NULL)
56                 return -ENOMEM;
57
58         main_table  = fib_trie_table(RT_TABLE_MAIN);
59         if (main_table == NULL)
60                 goto fail;
61
62         hlist_add_head_rcu(&local_table->tb_hlist,
63                                 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64         hlist_add_head_rcu(&main_table->tb_hlist,
65                                 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66         return 0;
67
68 fail:
69         kfree(local_table);
70         return -ENOMEM;
71 }
72 #else
73
74 struct fib_table *fib_new_table(struct net *net, u32 id)
75 {
76         struct fib_table *tb;
77         unsigned int h;
78
79         if (id == 0)
80                 id = RT_TABLE_MAIN;
81         tb = fib_get_table(net, id);
82         if (tb)
83                 return tb;
84
85         tb = fib_trie_table(id);
86         if (!tb)
87                 return NULL;
88         h = id & (FIB_TABLE_HASHSZ - 1);
89         hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90         return tb;
91 }
92
93 struct fib_table *fib_get_table(struct net *net, u32 id)
94 {
95         struct fib_table *tb;
96         struct hlist_node *node;
97         struct hlist_head *head;
98         unsigned int h;
99
100         if (id == 0)
101                 id = RT_TABLE_MAIN;
102         h = id & (FIB_TABLE_HASHSZ - 1);
103
104         rcu_read_lock();
105         head = &net->ipv4.fib_table_hash[h];
106         hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107                 if (tb->tb_id == id) {
108                         rcu_read_unlock();
109                         return tb;
110                 }
111         }
112         rcu_read_unlock();
113         return NULL;
114 }
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
116
117 static void fib_flush(struct net *net)
118 {
119         int flushed = 0;
120         struct fib_table *tb;
121         struct hlist_node *node;
122         struct hlist_head *head;
123         unsigned int h;
124
125         for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
126                 head = &net->ipv4.fib_table_hash[h];
127                 hlist_for_each_entry(tb, node, head, tb_hlist)
128                         flushed += fib_table_flush(tb);
129         }
130
131         if (flushed)
132                 rt_cache_flush(net, -1);
133 }
134
135 /*
136  * Find address type as if only "dev" was present in the system. If
137  * on_dev is NULL then all interfaces are taken into consideration.
138  */
139 static inline unsigned int __inet_dev_addr_type(struct net *net,
140                                                 const struct net_device *dev,
141                                                 __be32 addr)
142 {
143         struct flowi4           fl4 = { .daddr = addr };
144         struct fib_result       res;
145         unsigned int ret = RTN_BROADCAST;
146         struct fib_table *local_table;
147
148         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
149                 return RTN_BROADCAST;
150         if (ipv4_is_multicast(addr))
151                 return RTN_MULTICAST;
152
153 #ifdef CONFIG_IP_MULTIPLE_TABLES
154         res.r = NULL;
155 #endif
156
157         local_table = fib_get_table(net, RT_TABLE_LOCAL);
158         if (local_table) {
159                 ret = RTN_UNICAST;
160                 rcu_read_lock();
161                 if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
162                         if (!dev || dev == res.fi->fib_dev)
163                                 ret = res.type;
164                 }
165                 rcu_read_unlock();
166         }
167         return ret;
168 }
169
170 unsigned int inet_addr_type(struct net *net, __be32 addr)
171 {
172         return __inet_dev_addr_type(net, NULL, addr);
173 }
174 EXPORT_SYMBOL(inet_addr_type);
175
176 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
177                                 __be32 addr)
178 {
179         return __inet_dev_addr_type(net, dev, addr);
180 }
181 EXPORT_SYMBOL(inet_dev_addr_type);
182
183 __be32 fib_compute_spec_dst(struct sk_buff *skb)
184 {
185         struct net_device *dev = skb->dev;
186         struct in_device *in_dev;
187         struct fib_result res;
188         struct flowi4 fl4;
189         struct net *net;
190
191         if (skb->pkt_type != PACKET_BROADCAST &&
192             skb->pkt_type != PACKET_MULTICAST)
193                 return ip_hdr(skb)->daddr;
194
195         in_dev = __in_dev_get_rcu(dev);
196         BUG_ON(!in_dev);
197         fl4.flowi4_oif = 0;
198         fl4.flowi4_iif = 0;
199         fl4.daddr = ip_hdr(skb)->saddr;
200         fl4.saddr = ip_hdr(skb)->daddr;
201         fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
202         fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
203         fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
204
205         net = dev_net(dev);
206         if (!fib_lookup(net, &fl4, &res))
207                 return FIB_RES_PREFSRC(net, res);
208         else
209                 return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
210 }
211
212 /* Given (packet source, input interface) and optional (dst, oif, tos):
213  * - (main) check, that source is valid i.e. not broadcast or our local
214  *   address.
215  * - figure out what "logical" interface this packet arrived
216  *   and calculate "specific destination" address.
217  * - check, that packet arrived from expected physical interface.
218  * called with rcu_read_lock()
219  */
220 int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
221                         int oif, struct net_device *dev, u32 *itag)
222 {
223         struct in_device *in_dev;
224         struct flowi4 fl4;
225         struct fib_result res;
226         int no_addr, rpf, accept_local;
227         bool dev_match;
228         int ret;
229         struct net *net;
230
231         fl4.flowi4_oif = 0;
232         fl4.flowi4_iif = oif;
233         fl4.daddr = src;
234         fl4.saddr = dst;
235         fl4.flowi4_tos = tos;
236         fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
237
238         no_addr = rpf = accept_local = 0;
239         in_dev = __in_dev_get_rcu(dev);
240         if (in_dev) {
241                 no_addr = in_dev->ifa_list == NULL;
242
243                 /* Ignore rp_filter for packets protected by IPsec. */
244                 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
245
246                 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
247                 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
248         }
249
250         if (in_dev == NULL)
251                 goto e_inval;
252
253         net = dev_net(dev);
254         if (fib_lookup(net, &fl4, &res))
255                 goto last_resort;
256         if (res.type != RTN_UNICAST) {
257                 if (res.type != RTN_LOCAL || !accept_local)
258                         goto e_inval;
259         }
260         fib_combine_itag(itag, &res);
261         dev_match = false;
262
263 #ifdef CONFIG_IP_ROUTE_MULTIPATH
264         for (ret = 0; ret < res.fi->fib_nhs; ret++) {
265                 struct fib_nh *nh = &res.fi->fib_nh[ret];
266
267                 if (nh->nh_dev == dev) {
268                         dev_match = true;
269                         break;
270                 }
271         }
272 #else
273         if (FIB_RES_DEV(res) == dev)
274                 dev_match = true;
275 #endif
276         if (dev_match) {
277                 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
278                 return ret;
279         }
280         if (no_addr)
281                 goto last_resort;
282         if (rpf == 1)
283                 goto e_rpf;
284         fl4.flowi4_oif = dev->ifindex;
285
286         ret = 0;
287         if (fib_lookup(net, &fl4, &res) == 0) {
288                 if (res.type == RTN_UNICAST)
289                         ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
290         }
291         return ret;
292
293 last_resort:
294         if (rpf)
295                 goto e_rpf;
296         *itag = 0;
297         return 0;
298
299 e_inval:
300         return -EINVAL;
301 e_rpf:
302         return -EXDEV;
303 }
304
305 static inline __be32 sk_extract_addr(struct sockaddr *addr)
306 {
307         return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
308 }
309
310 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
311 {
312         struct nlattr *nla;
313
314         nla = (struct nlattr *) ((char *) mx + len);
315         nla->nla_type = type;
316         nla->nla_len = nla_attr_size(4);
317         *(u32 *) nla_data(nla) = value;
318
319         return len + nla_total_size(4);
320 }
321
322 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
323                                  struct fib_config *cfg)
324 {
325         __be32 addr;
326         int plen;
327
328         memset(cfg, 0, sizeof(*cfg));
329         cfg->fc_nlinfo.nl_net = net;
330
331         if (rt->rt_dst.sa_family != AF_INET)
332                 return -EAFNOSUPPORT;
333
334         /*
335          * Check mask for validity:
336          * a) it must be contiguous.
337          * b) destination must have all host bits clear.
338          * c) if application forgot to set correct family (AF_INET),
339          *    reject request unless it is absolutely clear i.e.
340          *    both family and mask are zero.
341          */
342         plen = 32;
343         addr = sk_extract_addr(&rt->rt_dst);
344         if (!(rt->rt_flags & RTF_HOST)) {
345                 __be32 mask = sk_extract_addr(&rt->rt_genmask);
346
347                 if (rt->rt_genmask.sa_family != AF_INET) {
348                         if (mask || rt->rt_genmask.sa_family)
349                                 return -EAFNOSUPPORT;
350                 }
351
352                 if (bad_mask(mask, addr))
353                         return -EINVAL;
354
355                 plen = inet_mask_len(mask);
356         }
357
358         cfg->fc_dst_len = plen;
359         cfg->fc_dst = addr;
360
361         if (cmd != SIOCDELRT) {
362                 cfg->fc_nlflags = NLM_F_CREATE;
363                 cfg->fc_protocol = RTPROT_BOOT;
364         }
365
366         if (rt->rt_metric)
367                 cfg->fc_priority = rt->rt_metric - 1;
368
369         if (rt->rt_flags & RTF_REJECT) {
370                 cfg->fc_scope = RT_SCOPE_HOST;
371                 cfg->fc_type = RTN_UNREACHABLE;
372                 return 0;
373         }
374
375         cfg->fc_scope = RT_SCOPE_NOWHERE;
376         cfg->fc_type = RTN_UNICAST;
377
378         if (rt->rt_dev) {
379                 char *colon;
380                 struct net_device *dev;
381                 char devname[IFNAMSIZ];
382
383                 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
384                         return -EFAULT;
385
386                 devname[IFNAMSIZ-1] = 0;
387                 colon = strchr(devname, ':');
388                 if (colon)
389                         *colon = 0;
390                 dev = __dev_get_by_name(net, devname);
391                 if (!dev)
392                         return -ENODEV;
393                 cfg->fc_oif = dev->ifindex;
394                 if (colon) {
395                         struct in_ifaddr *ifa;
396                         struct in_device *in_dev = __in_dev_get_rtnl(dev);
397                         if (!in_dev)
398                                 return -ENODEV;
399                         *colon = ':';
400                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
401                                 if (strcmp(ifa->ifa_label, devname) == 0)
402                                         break;
403                         if (ifa == NULL)
404                                 return -ENODEV;
405                         cfg->fc_prefsrc = ifa->ifa_local;
406                 }
407         }
408
409         addr = sk_extract_addr(&rt->rt_gateway);
410         if (rt->rt_gateway.sa_family == AF_INET && addr) {
411                 cfg->fc_gw = addr;
412                 if (rt->rt_flags & RTF_GATEWAY &&
413                     inet_addr_type(net, addr) == RTN_UNICAST)
414                         cfg->fc_scope = RT_SCOPE_UNIVERSE;
415         }
416
417         if (cmd == SIOCDELRT)
418                 return 0;
419
420         if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
421                 return -EINVAL;
422
423         if (cfg->fc_scope == RT_SCOPE_NOWHERE)
424                 cfg->fc_scope = RT_SCOPE_LINK;
425
426         if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
427                 struct nlattr *mx;
428                 int len = 0;
429
430                 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
431                 if (mx == NULL)
432                         return -ENOMEM;
433
434                 if (rt->rt_flags & RTF_MTU)
435                         len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
436
437                 if (rt->rt_flags & RTF_WINDOW)
438                         len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
439
440                 if (rt->rt_flags & RTF_IRTT)
441                         len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
442
443                 cfg->fc_mx = mx;
444                 cfg->fc_mx_len = len;
445         }
446
447         return 0;
448 }
449
450 /*
451  * Handle IP routing ioctl calls.
452  * These are used to manipulate the routing tables
453  */
454 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
455 {
456         struct fib_config cfg;
457         struct rtentry rt;
458         int err;
459
460         switch (cmd) {
461         case SIOCADDRT:         /* Add a route */
462         case SIOCDELRT:         /* Delete a route */
463                 if (!capable(CAP_NET_ADMIN))
464                         return -EPERM;
465
466                 if (copy_from_user(&rt, arg, sizeof(rt)))
467                         return -EFAULT;
468
469                 rtnl_lock();
470                 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
471                 if (err == 0) {
472                         struct fib_table *tb;
473
474                         if (cmd == SIOCDELRT) {
475                                 tb = fib_get_table(net, cfg.fc_table);
476                                 if (tb)
477                                         err = fib_table_delete(tb, &cfg);
478                                 else
479                                         err = -ESRCH;
480                         } else {
481                                 tb = fib_new_table(net, cfg.fc_table);
482                                 if (tb)
483                                         err = fib_table_insert(tb, &cfg);
484                                 else
485                                         err = -ENOBUFS;
486                         }
487
488                         /* allocated by rtentry_to_fib_config() */
489                         kfree(cfg.fc_mx);
490                 }
491                 rtnl_unlock();
492                 return err;
493         }
494         return -EINVAL;
495 }
496
497 const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
498         [RTA_DST]               = { .type = NLA_U32 },
499         [RTA_SRC]               = { .type = NLA_U32 },
500         [RTA_IIF]               = { .type = NLA_U32 },
501         [RTA_OIF]               = { .type = NLA_U32 },
502         [RTA_GATEWAY]           = { .type = NLA_U32 },
503         [RTA_PRIORITY]          = { .type = NLA_U32 },
504         [RTA_PREFSRC]           = { .type = NLA_U32 },
505         [RTA_METRICS]           = { .type = NLA_NESTED },
506         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
507         [RTA_FLOW]              = { .type = NLA_U32 },
508 };
509
510 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
511                              struct nlmsghdr *nlh, struct fib_config *cfg)
512 {
513         struct nlattr *attr;
514         int err, remaining;
515         struct rtmsg *rtm;
516
517         err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
518         if (err < 0)
519                 goto errout;
520
521         memset(cfg, 0, sizeof(*cfg));
522
523         rtm = nlmsg_data(nlh);
524         cfg->fc_dst_len = rtm->rtm_dst_len;
525         cfg->fc_tos = rtm->rtm_tos;
526         cfg->fc_table = rtm->rtm_table;
527         cfg->fc_protocol = rtm->rtm_protocol;
528         cfg->fc_scope = rtm->rtm_scope;
529         cfg->fc_type = rtm->rtm_type;
530         cfg->fc_flags = rtm->rtm_flags;
531         cfg->fc_nlflags = nlh->nlmsg_flags;
532
533         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
534         cfg->fc_nlinfo.nlh = nlh;
535         cfg->fc_nlinfo.nl_net = net;
536
537         if (cfg->fc_type > RTN_MAX) {
538                 err = -EINVAL;
539                 goto errout;
540         }
541
542         nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
543                 switch (nla_type(attr)) {
544                 case RTA_DST:
545                         cfg->fc_dst = nla_get_be32(attr);
546                         break;
547                 case RTA_OIF:
548                         cfg->fc_oif = nla_get_u32(attr);
549                         break;
550                 case RTA_GATEWAY:
551                         cfg->fc_gw = nla_get_be32(attr);
552                         break;
553                 case RTA_PRIORITY:
554                         cfg->fc_priority = nla_get_u32(attr);
555                         break;
556                 case RTA_PREFSRC:
557                         cfg->fc_prefsrc = nla_get_be32(attr);
558                         break;
559                 case RTA_METRICS:
560                         cfg->fc_mx = nla_data(attr);
561                         cfg->fc_mx_len = nla_len(attr);
562                         break;
563                 case RTA_MULTIPATH:
564                         cfg->fc_mp = nla_data(attr);
565                         cfg->fc_mp_len = nla_len(attr);
566                         break;
567                 case RTA_FLOW:
568                         cfg->fc_flow = nla_get_u32(attr);
569                         break;
570                 case RTA_TABLE:
571                         cfg->fc_table = nla_get_u32(attr);
572                         break;
573                 }
574         }
575
576         return 0;
577 errout:
578         return err;
579 }
580
581 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
582 {
583         struct net *net = sock_net(skb->sk);
584         struct fib_config cfg;
585         struct fib_table *tb;
586         int err;
587
588         err = rtm_to_fib_config(net, skb, nlh, &cfg);
589         if (err < 0)
590                 goto errout;
591
592         tb = fib_get_table(net, cfg.fc_table);
593         if (tb == NULL) {
594                 err = -ESRCH;
595                 goto errout;
596         }
597
598         err = fib_table_delete(tb, &cfg);
599 errout:
600         return err;
601 }
602
603 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
604 {
605         struct net *net = sock_net(skb->sk);
606         struct fib_config cfg;
607         struct fib_table *tb;
608         int err;
609
610         err = rtm_to_fib_config(net, skb, nlh, &cfg);
611         if (err < 0)
612                 goto errout;
613
614         tb = fib_new_table(net, cfg.fc_table);
615         if (tb == NULL) {
616                 err = -ENOBUFS;
617                 goto errout;
618         }
619
620         err = fib_table_insert(tb, &cfg);
621 errout:
622         return err;
623 }
624
625 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
626 {
627         struct net *net = sock_net(skb->sk);
628         unsigned int h, s_h;
629         unsigned int e = 0, s_e;
630         struct fib_table *tb;
631         struct hlist_node *node;
632         struct hlist_head *head;
633         int dumped = 0;
634
635         if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
636             ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
637                 return ip_rt_dump(skb, cb);
638
639         s_h = cb->args[0];
640         s_e = cb->args[1];
641
642         for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
643                 e = 0;
644                 head = &net->ipv4.fib_table_hash[h];
645                 hlist_for_each_entry(tb, node, head, tb_hlist) {
646                         if (e < s_e)
647                                 goto next;
648                         if (dumped)
649                                 memset(&cb->args[2], 0, sizeof(cb->args) -
650                                                  2 * sizeof(cb->args[0]));
651                         if (fib_table_dump(tb, skb, cb) < 0)
652                                 goto out;
653                         dumped = 1;
654 next:
655                         e++;
656                 }
657         }
658 out:
659         cb->args[1] = e;
660         cb->args[0] = h;
661
662         return skb->len;
663 }
664
665 /* Prepare and feed intra-kernel routing request.
666  * Really, it should be netlink message, but :-( netlink
667  * can be not configured, so that we feed it directly
668  * to fib engine. It is legal, because all events occur
669  * only when netlink is already locked.
670  */
671 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
672 {
673         struct net *net = dev_net(ifa->ifa_dev->dev);
674         struct fib_table *tb;
675         struct fib_config cfg = {
676                 .fc_protocol = RTPROT_KERNEL,
677                 .fc_type = type,
678                 .fc_dst = dst,
679                 .fc_dst_len = dst_len,
680                 .fc_prefsrc = ifa->ifa_local,
681                 .fc_oif = ifa->ifa_dev->dev->ifindex,
682                 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
683                 .fc_nlinfo = {
684                         .nl_net = net,
685                 },
686         };
687
688         if (type == RTN_UNICAST)
689                 tb = fib_new_table(net, RT_TABLE_MAIN);
690         else
691                 tb = fib_new_table(net, RT_TABLE_LOCAL);
692
693         if (tb == NULL)
694                 return;
695
696         cfg.fc_table = tb->tb_id;
697
698         if (type != RTN_LOCAL)
699                 cfg.fc_scope = RT_SCOPE_LINK;
700         else
701                 cfg.fc_scope = RT_SCOPE_HOST;
702
703         if (cmd == RTM_NEWROUTE)
704                 fib_table_insert(tb, &cfg);
705         else
706                 fib_table_delete(tb, &cfg);
707 }
708
709 void fib_add_ifaddr(struct in_ifaddr *ifa)
710 {
711         struct in_device *in_dev = ifa->ifa_dev;
712         struct net_device *dev = in_dev->dev;
713         struct in_ifaddr *prim = ifa;
714         __be32 mask = ifa->ifa_mask;
715         __be32 addr = ifa->ifa_local;
716         __be32 prefix = ifa->ifa_address & mask;
717
718         if (ifa->ifa_flags & IFA_F_SECONDARY) {
719                 prim = inet_ifa_byprefix(in_dev, prefix, mask);
720                 if (prim == NULL) {
721                         pr_warn("%s: bug: prim == NULL\n", __func__);
722                         return;
723                 }
724         }
725
726         fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
727
728         if (!(dev->flags & IFF_UP))
729                 return;
730
731         /* Add broadcast address, if it is explicitly assigned. */
732         if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
733                 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734
735         if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
736             (prefix != addr || ifa->ifa_prefixlen < 32)) {
737                 fib_magic(RTM_NEWROUTE,
738                           dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
739                           prefix, ifa->ifa_prefixlen, prim);
740
741                 /* Add network specific broadcasts, when it takes a sense */
742                 if (ifa->ifa_prefixlen < 31) {
743                         fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
744                         fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
745                                   32, prim);
746                 }
747         }
748 }
749
750 /* Delete primary or secondary address.
751  * Optionally, on secondary address promotion consider the addresses
752  * from subnet iprim as deleted, even if they are in device list.
753  * In this case the secondary ifa can be in device list.
754  */
755 void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
756 {
757         struct in_device *in_dev = ifa->ifa_dev;
758         struct net_device *dev = in_dev->dev;
759         struct in_ifaddr *ifa1;
760         struct in_ifaddr *prim = ifa, *prim1 = NULL;
761         __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
762         __be32 any = ifa->ifa_address & ifa->ifa_mask;
763 #define LOCAL_OK        1
764 #define BRD_OK          2
765 #define BRD0_OK         4
766 #define BRD1_OK         8
767         unsigned int ok = 0;
768         int subnet = 0;         /* Primary network */
769         int gone = 1;           /* Address is missing */
770         int same_prefsrc = 0;   /* Another primary with same IP */
771
772         if (ifa->ifa_flags & IFA_F_SECONDARY) {
773                 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
774                 if (prim == NULL) {
775                         pr_warn("%s: bug: prim == NULL\n", __func__);
776                         return;
777                 }
778                 if (iprim && iprim != prim) {
779                         pr_warn("%s: bug: iprim != prim\n", __func__);
780                         return;
781                 }
782         } else if (!ipv4_is_zeronet(any) &&
783                    (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
784                 fib_magic(RTM_DELROUTE,
785                           dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
786                           any, ifa->ifa_prefixlen, prim);
787                 subnet = 1;
788         }
789
790         /* Deletion is more complicated than add.
791          * We should take care of not to delete too much :-)
792          *
793          * Scan address list to be sure that addresses are really gone.
794          */
795
796         for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
797                 if (ifa1 == ifa) {
798                         /* promotion, keep the IP */
799                         gone = 0;
800                         continue;
801                 }
802                 /* Ignore IFAs from our subnet */
803                 if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
804                     inet_ifa_match(ifa1->ifa_address, iprim))
805                         continue;
806
807                 /* Ignore ifa1 if it uses different primary IP (prefsrc) */
808                 if (ifa1->ifa_flags & IFA_F_SECONDARY) {
809                         /* Another address from our subnet? */
810                         if (ifa1->ifa_mask == prim->ifa_mask &&
811                             inet_ifa_match(ifa1->ifa_address, prim))
812                                 prim1 = prim;
813                         else {
814                                 /* We reached the secondaries, so
815                                  * same_prefsrc should be determined.
816                                  */
817                                 if (!same_prefsrc)
818                                         continue;
819                                 /* Search new prim1 if ifa1 is not
820                                  * using the current prim1
821                                  */
822                                 if (!prim1 ||
823                                     ifa1->ifa_mask != prim1->ifa_mask ||
824                                     !inet_ifa_match(ifa1->ifa_address, prim1))
825                                         prim1 = inet_ifa_byprefix(in_dev,
826                                                         ifa1->ifa_address,
827                                                         ifa1->ifa_mask);
828                                 if (!prim1)
829                                         continue;
830                                 if (prim1->ifa_local != prim->ifa_local)
831                                         continue;
832                         }
833                 } else {
834                         if (prim->ifa_local != ifa1->ifa_local)
835                                 continue;
836                         prim1 = ifa1;
837                         if (prim != prim1)
838                                 same_prefsrc = 1;
839                 }
840                 if (ifa->ifa_local == ifa1->ifa_local)
841                         ok |= LOCAL_OK;
842                 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
843                         ok |= BRD_OK;
844                 if (brd == ifa1->ifa_broadcast)
845                         ok |= BRD1_OK;
846                 if (any == ifa1->ifa_broadcast)
847                         ok |= BRD0_OK;
848                 /* primary has network specific broadcasts */
849                 if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
850                         __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
851                         __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
852
853                         if (!ipv4_is_zeronet(any1)) {
854                                 if (ifa->ifa_broadcast == brd1 ||
855                                     ifa->ifa_broadcast == any1)
856                                         ok |= BRD_OK;
857                                 if (brd == brd1 || brd == any1)
858                                         ok |= BRD1_OK;
859                                 if (any == brd1 || any == any1)
860                                         ok |= BRD0_OK;
861                         }
862                 }
863         }
864
865         if (!(ok & BRD_OK))
866                 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
867         if (subnet && ifa->ifa_prefixlen < 31) {
868                 if (!(ok & BRD1_OK))
869                         fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
870                 if (!(ok & BRD0_OK))
871                         fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
872         }
873         if (!(ok & LOCAL_OK)) {
874                 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
875
876                 /* Check, that this local address finally disappeared. */
877                 if (gone &&
878                     inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
879                         /* And the last, but not the least thing.
880                          * We must flush stray FIB entries.
881                          *
882                          * First of all, we scan fib_info list searching
883                          * for stray nexthop entries, then ignite fib_flush.
884                          */
885                         if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
886                                 fib_flush(dev_net(dev));
887                 }
888         }
889 #undef LOCAL_OK
890 #undef BRD_OK
891 #undef BRD0_OK
892 #undef BRD1_OK
893 }
894
895 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
896 {
897
898         struct fib_result       res;
899         struct flowi4           fl4 = {
900                 .flowi4_mark = frn->fl_mark,
901                 .daddr = frn->fl_addr,
902                 .flowi4_tos = frn->fl_tos,
903                 .flowi4_scope = frn->fl_scope,
904         };
905
906 #ifdef CONFIG_IP_MULTIPLE_TABLES
907         res.r = NULL;
908 #endif
909
910         frn->err = -ENOENT;
911         if (tb) {
912                 local_bh_disable();
913
914                 frn->tb_id = tb->tb_id;
915                 rcu_read_lock();
916                 frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
917
918                 if (!frn->err) {
919                         frn->prefixlen = res.prefixlen;
920                         frn->nh_sel = res.nh_sel;
921                         frn->type = res.type;
922                         frn->scope = res.scope;
923                 }
924                 rcu_read_unlock();
925                 local_bh_enable();
926         }
927 }
928
929 static void nl_fib_input(struct sk_buff *skb)
930 {
931         struct net *net;
932         struct fib_result_nl *frn;
933         struct nlmsghdr *nlh;
934         struct fib_table *tb;
935         u32 pid;
936
937         net = sock_net(skb->sk);
938         nlh = nlmsg_hdr(skb);
939         if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
940             nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
941                 return;
942
943         skb = skb_clone(skb, GFP_KERNEL);
944         if (skb == NULL)
945                 return;
946         nlh = nlmsg_hdr(skb);
947
948         frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
949         tb = fib_get_table(net, frn->tb_id_in);
950
951         nl_fib_lookup(frn, tb);
952
953         pid = NETLINK_CB(skb).pid;      /* pid of sending process */
954         NETLINK_CB(skb).pid = 0;        /* from kernel */
955         NETLINK_CB(skb).dst_group = 0;  /* unicast */
956         netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
957 }
958
959 static int __net_init nl_fib_lookup_init(struct net *net)
960 {
961         struct sock *sk;
962         sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
963                                    nl_fib_input, NULL, THIS_MODULE);
964         if (sk == NULL)
965                 return -EAFNOSUPPORT;
966         net->ipv4.fibnl = sk;
967         return 0;
968 }
969
970 static void nl_fib_lookup_exit(struct net *net)
971 {
972         netlink_kernel_release(net->ipv4.fibnl);
973         net->ipv4.fibnl = NULL;
974 }
975
976 static void fib_disable_ip(struct net_device *dev, int force, int delay)
977 {
978         if (fib_sync_down_dev(dev, force))
979                 fib_flush(dev_net(dev));
980         rt_cache_flush(dev_net(dev), delay);
981         arp_ifdown(dev);
982 }
983
984 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
985 {
986         struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
987         struct net_device *dev = ifa->ifa_dev->dev;
988         struct net *net = dev_net(dev);
989
990         switch (event) {
991         case NETDEV_UP:
992                 fib_add_ifaddr(ifa);
993 #ifdef CONFIG_IP_ROUTE_MULTIPATH
994                 fib_sync_up(dev);
995 #endif
996                 atomic_inc(&net->ipv4.dev_addr_genid);
997                 rt_cache_flush(dev_net(dev), -1);
998                 break;
999         case NETDEV_DOWN:
1000                 fib_del_ifaddr(ifa, NULL);
1001                 atomic_inc(&net->ipv4.dev_addr_genid);
1002                 if (ifa->ifa_dev->ifa_list == NULL) {
1003                         /* Last address was deleted from this interface.
1004                          * Disable IP.
1005                          */
1006                         fib_disable_ip(dev, 1, 0);
1007                 } else {
1008                         rt_cache_flush(dev_net(dev), -1);
1009                 }
1010                 break;
1011         }
1012         return NOTIFY_DONE;
1013 }
1014
1015 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1016 {
1017         struct net_device *dev = ptr;
1018         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1019         struct net *net = dev_net(dev);
1020
1021         if (event == NETDEV_UNREGISTER) {
1022                 fib_disable_ip(dev, 2, -1);
1023                 return NOTIFY_DONE;
1024         }
1025
1026         if (!in_dev)
1027                 return NOTIFY_DONE;
1028
1029         switch (event) {
1030         case NETDEV_UP:
1031                 for_ifa(in_dev) {
1032                         fib_add_ifaddr(ifa);
1033                 } endfor_ifa(in_dev);
1034 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1035                 fib_sync_up(dev);
1036 #endif
1037                 atomic_inc(&net->ipv4.dev_addr_genid);
1038                 rt_cache_flush(dev_net(dev), -1);
1039                 break;
1040         case NETDEV_DOWN:
1041                 fib_disable_ip(dev, 0, 0);
1042                 break;
1043         case NETDEV_CHANGEMTU:
1044         case NETDEV_CHANGE:
1045                 rt_cache_flush(dev_net(dev), 0);
1046                 break;
1047         case NETDEV_UNREGISTER_BATCH:
1048                 /* The batch unregister is only called on the first
1049                  * device in the list of devices being unregistered.
1050                  * Therefore we should not pass dev_net(dev) in here.
1051                  */
1052                 rt_cache_flush_batch(NULL);
1053                 break;
1054         }
1055         return NOTIFY_DONE;
1056 }
1057
1058 static struct notifier_block fib_inetaddr_notifier = {
1059         .notifier_call = fib_inetaddr_event,
1060 };
1061
1062 static struct notifier_block fib_netdev_notifier = {
1063         .notifier_call = fib_netdev_event,
1064 };
1065
1066 static int __net_init ip_fib_net_init(struct net *net)
1067 {
1068         int err;
1069         size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1070
1071         /* Avoid false sharing : Use at least a full cache line */
1072         size = max_t(size_t, size, L1_CACHE_BYTES);
1073
1074         net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
1075         if (net->ipv4.fib_table_hash == NULL)
1076                 return -ENOMEM;
1077
1078         err = fib4_rules_init(net);
1079         if (err < 0)
1080                 goto fail;
1081         return 0;
1082
1083 fail:
1084         kfree(net->ipv4.fib_table_hash);
1085         return err;
1086 }
1087
1088 static void ip_fib_net_exit(struct net *net)
1089 {
1090         unsigned int i;
1091
1092 #ifdef CONFIG_IP_MULTIPLE_TABLES
1093         fib4_rules_exit(net);
1094 #endif
1095
1096         rtnl_lock();
1097         for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1098                 struct fib_table *tb;
1099                 struct hlist_head *head;
1100                 struct hlist_node *node, *tmp;
1101
1102                 head = &net->ipv4.fib_table_hash[i];
1103                 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1104                         hlist_del(node);
1105                         fib_table_flush(tb);
1106                         fib_free_table(tb);
1107                 }
1108         }
1109         rtnl_unlock();
1110         kfree(net->ipv4.fib_table_hash);
1111 }
1112
1113 static int __net_init fib_net_init(struct net *net)
1114 {
1115         int error;
1116
1117         error = ip_fib_net_init(net);
1118         if (error < 0)
1119                 goto out;
1120         error = nl_fib_lookup_init(net);
1121         if (error < 0)
1122                 goto out_nlfl;
1123         error = fib_proc_init(net);
1124         if (error < 0)
1125                 goto out_proc;
1126 out:
1127         return error;
1128
1129 out_proc:
1130         nl_fib_lookup_exit(net);
1131 out_nlfl:
1132         ip_fib_net_exit(net);
1133         goto out;
1134 }
1135
1136 static void __net_exit fib_net_exit(struct net *net)
1137 {
1138         fib_proc_exit(net);
1139         nl_fib_lookup_exit(net);
1140         ip_fib_net_exit(net);
1141 }
1142
1143 static struct pernet_operations fib_net_ops = {
1144         .init = fib_net_init,
1145         .exit = fib_net_exit,
1146 };
1147
1148 void __init ip_fib_init(void)
1149 {
1150         rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
1151         rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1152         rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1153
1154         register_pernet_subsys(&fib_net_ops);
1155         register_netdevice_notifier(&fib_netdev_notifier);
1156         register_inetaddr_notifier(&fib_inetaddr_notifier);
1157
1158         fib_trie_init();
1159 }