]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/core/fib_rules.c
[NETNS]: FIB rules API cleanup.
[karo-tx-linux.git] / net / core / fib_rules.c
1 /*
2  * net/core/fib_rules.c         Generic Routing Rules
3  *
4  *      This program is free software; you can redistribute it and/or
5  *      modify it under the terms of the GNU General Public License as
6  *      published by the Free Software Foundation, version 2.
7  *
8  * Authors:     Thomas Graf <tgraf@suug.ch>
9  */
10
11 #include <linux/types.h>
12 #include <linux/kernel.h>
13 #include <linux/list.h>
14 #include <net/net_namespace.h>
15 #include <net/sock.h>
16 #include <net/fib_rules.h>
17
18 int fib_default_rule_add(struct fib_rules_ops *ops,
19                          u32 pref, u32 table, u32 flags)
20 {
21         struct fib_rule *r;
22
23         r = kzalloc(ops->rule_size, GFP_KERNEL);
24         if (r == NULL)
25                 return -ENOMEM;
26
27         atomic_set(&r->refcnt, 1);
28         r->action = FR_ACT_TO_TBL;
29         r->pref = pref;
30         r->table = table;
31         r->flags = flags;
32
33         /* The lock is not required here, the list in unreacheable
34          * at the moment this function is called */
35         list_add_tail(&r->list, &ops->rules_list);
36         return 0;
37 }
38 EXPORT_SYMBOL(fib_default_rule_add);
39
40 static void notify_rule_change(int event, struct fib_rule *rule,
41                                struct fib_rules_ops *ops, struct nlmsghdr *nlh,
42                                u32 pid);
43
44 static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
45 {
46         struct fib_rules_ops *ops;
47
48         rcu_read_lock();
49         list_for_each_entry_rcu(ops, &net->rules_ops, list) {
50                 if (ops->family == family) {
51                         if (!try_module_get(ops->owner))
52                                 ops = NULL;
53                         rcu_read_unlock();
54                         return ops;
55                 }
56         }
57         rcu_read_unlock();
58
59         return NULL;
60 }
61
62 static void rules_ops_put(struct fib_rules_ops *ops)
63 {
64         if (ops)
65                 module_put(ops->owner);
66 }
67
68 static void flush_route_cache(struct fib_rules_ops *ops)
69 {
70         if (ops->flush_cache)
71                 ops->flush_cache();
72 }
73
74 int fib_rules_register(struct fib_rules_ops *ops)
75 {
76         int err = -EEXIST;
77         struct fib_rules_ops *o;
78         struct net *net;
79
80         net = ops->fro_net;
81
82         if (ops->rule_size < sizeof(struct fib_rule))
83                 return -EINVAL;
84
85         if (ops->match == NULL || ops->configure == NULL ||
86             ops->compare == NULL || ops->fill == NULL ||
87             ops->action == NULL)
88                 return -EINVAL;
89
90         spin_lock(&net->rules_mod_lock);
91         list_for_each_entry(o, &net->rules_ops, list)
92                 if (ops->family == o->family)
93                         goto errout;
94
95         hold_net(net);
96         list_add_tail_rcu(&ops->list, &net->rules_ops);
97         err = 0;
98 errout:
99         spin_unlock(&net->rules_mod_lock);
100
101         return err;
102 }
103
104 EXPORT_SYMBOL_GPL(fib_rules_register);
105
106 void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
107 {
108         struct fib_rule *rule, *tmp;
109
110         list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
111                 list_del_rcu(&rule->list);
112                 fib_rule_put(rule);
113         }
114 }
115 EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
116
117 void fib_rules_unregister(struct fib_rules_ops *ops)
118 {
119         struct net *net = ops->fro_net;
120
121         spin_lock(&net->rules_mod_lock);
122         list_del_rcu(&ops->list);
123         fib_rules_cleanup_ops(ops);
124         spin_unlock(&net->rules_mod_lock);
125
126         synchronize_rcu();
127         release_net(net);
128 }
129
130 EXPORT_SYMBOL_GPL(fib_rules_unregister);
131
132 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
133                           struct flowi *fl, int flags)
134 {
135         int ret = 0;
136
137         if (rule->ifindex && (rule->ifindex != fl->iif))
138                 goto out;
139
140         if ((rule->mark ^ fl->mark) & rule->mark_mask)
141                 goto out;
142
143         ret = ops->match(rule, fl, flags);
144 out:
145         return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
146 }
147
148 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
149                      int flags, struct fib_lookup_arg *arg)
150 {
151         struct fib_rule *rule;
152         int err;
153
154         rcu_read_lock();
155
156         list_for_each_entry_rcu(rule, &ops->rules_list, list) {
157 jumped:
158                 if (!fib_rule_match(rule, ops, fl, flags))
159                         continue;
160
161                 if (rule->action == FR_ACT_GOTO) {
162                         struct fib_rule *target;
163
164                         target = rcu_dereference(rule->ctarget);
165                         if (target == NULL) {
166                                 continue;
167                         } else {
168                                 rule = target;
169                                 goto jumped;
170                         }
171                 } else if (rule->action == FR_ACT_NOP)
172                         continue;
173                 else
174                         err = ops->action(rule, fl, flags, arg);
175
176                 if (err != -EAGAIN) {
177                         fib_rule_get(rule);
178                         arg->rule = rule;
179                         goto out;
180                 }
181         }
182
183         err = -ESRCH;
184 out:
185         rcu_read_unlock();
186
187         return err;
188 }
189
190 EXPORT_SYMBOL_GPL(fib_rules_lookup);
191
192 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
193                             struct fib_rules_ops *ops)
194 {
195         int err = -EINVAL;
196
197         if (frh->src_len)
198                 if (tb[FRA_SRC] == NULL ||
199                     frh->src_len > (ops->addr_size * 8) ||
200                     nla_len(tb[FRA_SRC]) != ops->addr_size)
201                         goto errout;
202
203         if (frh->dst_len)
204                 if (tb[FRA_DST] == NULL ||
205                     frh->dst_len > (ops->addr_size * 8) ||
206                     nla_len(tb[FRA_DST]) != ops->addr_size)
207                         goto errout;
208
209         err = 0;
210 errout:
211         return err;
212 }
213
214 static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
215 {
216         struct net *net = skb->sk->sk_net;
217         struct fib_rule_hdr *frh = nlmsg_data(nlh);
218         struct fib_rules_ops *ops = NULL;
219         struct fib_rule *rule, *r, *last = NULL;
220         struct nlattr *tb[FRA_MAX+1];
221         int err = -EINVAL, unresolved = 0;
222
223         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
224                 goto errout;
225
226         ops = lookup_rules_ops(net, frh->family);
227         if (ops == NULL) {
228                 err = EAFNOSUPPORT;
229                 goto errout;
230         }
231
232         err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
233         if (err < 0)
234                 goto errout;
235
236         err = validate_rulemsg(frh, tb, ops);
237         if (err < 0)
238                 goto errout;
239
240         rule = kzalloc(ops->rule_size, GFP_KERNEL);
241         if (rule == NULL) {
242                 err = -ENOMEM;
243                 goto errout;
244         }
245
246         if (tb[FRA_PRIORITY])
247                 rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
248
249         if (tb[FRA_IFNAME]) {
250                 struct net_device *dev;
251
252                 rule->ifindex = -1;
253                 nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
254                 dev = __dev_get_by_name(net, rule->ifname);
255                 if (dev)
256                         rule->ifindex = dev->ifindex;
257         }
258
259         if (tb[FRA_FWMARK]) {
260                 rule->mark = nla_get_u32(tb[FRA_FWMARK]);
261                 if (rule->mark)
262                         /* compatibility: if the mark value is non-zero all bits
263                          * are compared unless a mask is explicitly specified.
264                          */
265                         rule->mark_mask = 0xFFFFFFFF;
266         }
267
268         if (tb[FRA_FWMASK])
269                 rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
270
271         rule->action = frh->action;
272         rule->flags = frh->flags;
273         rule->table = frh_get_table(frh, tb);
274
275         if (!rule->pref && ops->default_pref)
276                 rule->pref = ops->default_pref(ops);
277
278         err = -EINVAL;
279         if (tb[FRA_GOTO]) {
280                 if (rule->action != FR_ACT_GOTO)
281                         goto errout_free;
282
283                 rule->target = nla_get_u32(tb[FRA_GOTO]);
284                 /* Backward jumps are prohibited to avoid endless loops */
285                 if (rule->target <= rule->pref)
286                         goto errout_free;
287
288                 list_for_each_entry(r, &ops->rules_list, list) {
289                         if (r->pref == rule->target) {
290                                 rule->ctarget = r;
291                                 break;
292                         }
293                 }
294
295                 if (rule->ctarget == NULL)
296                         unresolved = 1;
297         } else if (rule->action == FR_ACT_GOTO)
298                 goto errout_free;
299
300         err = ops->configure(rule, skb, nlh, frh, tb);
301         if (err < 0)
302                 goto errout_free;
303
304         list_for_each_entry(r, &ops->rules_list, list) {
305                 if (r->pref > rule->pref)
306                         break;
307                 last = r;
308         }
309
310         fib_rule_get(rule);
311
312         if (ops->unresolved_rules) {
313                 /*
314                  * There are unresolved goto rules in the list, check if
315                  * any of them are pointing to this new rule.
316                  */
317                 list_for_each_entry(r, &ops->rules_list, list) {
318                         if (r->action == FR_ACT_GOTO &&
319                             r->target == rule->pref) {
320                                 BUG_ON(r->ctarget != NULL);
321                                 rcu_assign_pointer(r->ctarget, rule);
322                                 if (--ops->unresolved_rules == 0)
323                                         break;
324                         }
325                 }
326         }
327
328         if (rule->action == FR_ACT_GOTO)
329                 ops->nr_goto_rules++;
330
331         if (unresolved)
332                 ops->unresolved_rules++;
333
334         if (last)
335                 list_add_rcu(&rule->list, &last->list);
336         else
337                 list_add_rcu(&rule->list, &ops->rules_list);
338
339         notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
340         flush_route_cache(ops);
341         rules_ops_put(ops);
342         return 0;
343
344 errout_free:
345         kfree(rule);
346 errout:
347         rules_ops_put(ops);
348         return err;
349 }
350
351 static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
352 {
353         struct net *net = skb->sk->sk_net;
354         struct fib_rule_hdr *frh = nlmsg_data(nlh);
355         struct fib_rules_ops *ops = NULL;
356         struct fib_rule *rule, *tmp;
357         struct nlattr *tb[FRA_MAX+1];
358         int err = -EINVAL;
359
360         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
361                 goto errout;
362
363         ops = lookup_rules_ops(net, frh->family);
364         if (ops == NULL) {
365                 err = EAFNOSUPPORT;
366                 goto errout;
367         }
368
369         err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
370         if (err < 0)
371                 goto errout;
372
373         err = validate_rulemsg(frh, tb, ops);
374         if (err < 0)
375                 goto errout;
376
377         list_for_each_entry(rule, &ops->rules_list, list) {
378                 if (frh->action && (frh->action != rule->action))
379                         continue;
380
381                 if (frh->table && (frh_get_table(frh, tb) != rule->table))
382                         continue;
383
384                 if (tb[FRA_PRIORITY] &&
385                     (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
386                         continue;
387
388                 if (tb[FRA_IFNAME] &&
389                     nla_strcmp(tb[FRA_IFNAME], rule->ifname))
390                         continue;
391
392                 if (tb[FRA_FWMARK] &&
393                     (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
394                         continue;
395
396                 if (tb[FRA_FWMASK] &&
397                     (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
398                         continue;
399
400                 if (!ops->compare(rule, frh, tb))
401                         continue;
402
403                 if (rule->flags & FIB_RULE_PERMANENT) {
404                         err = -EPERM;
405                         goto errout;
406                 }
407
408                 list_del_rcu(&rule->list);
409
410                 if (rule->action == FR_ACT_GOTO)
411                         ops->nr_goto_rules--;
412
413                 /*
414                  * Check if this rule is a target to any of them. If so,
415                  * disable them. As this operation is eventually very
416                  * expensive, it is only performed if goto rules have
417                  * actually been added.
418                  */
419                 if (ops->nr_goto_rules > 0) {
420                         list_for_each_entry(tmp, &ops->rules_list, list) {
421                                 if (tmp->ctarget == rule) {
422                                         rcu_assign_pointer(tmp->ctarget, NULL);
423                                         ops->unresolved_rules++;
424                                 }
425                         }
426                 }
427
428                 synchronize_rcu();
429                 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
430                                    NETLINK_CB(skb).pid);
431                 fib_rule_put(rule);
432                 flush_route_cache(ops);
433                 rules_ops_put(ops);
434                 return 0;
435         }
436
437         err = -ENOENT;
438 errout:
439         rules_ops_put(ops);
440         return err;
441 }
442
443 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
444                                          struct fib_rule *rule)
445 {
446         size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
447                          + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
448                          + nla_total_size(4) /* FRA_PRIORITY */
449                          + nla_total_size(4) /* FRA_TABLE */
450                          + nla_total_size(4) /* FRA_FWMARK */
451                          + nla_total_size(4); /* FRA_FWMASK */
452
453         if (ops->nlmsg_payload)
454                 payload += ops->nlmsg_payload(rule);
455
456         return payload;
457 }
458
459 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
460                             u32 pid, u32 seq, int type, int flags,
461                             struct fib_rules_ops *ops)
462 {
463         struct nlmsghdr *nlh;
464         struct fib_rule_hdr *frh;
465
466         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
467         if (nlh == NULL)
468                 return -EMSGSIZE;
469
470         frh = nlmsg_data(nlh);
471         frh->table = rule->table;
472         NLA_PUT_U32(skb, FRA_TABLE, rule->table);
473         frh->res1 = 0;
474         frh->res2 = 0;
475         frh->action = rule->action;
476         frh->flags = rule->flags;
477
478         if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
479                 frh->flags |= FIB_RULE_UNRESOLVED;
480
481         if (rule->ifname[0]) {
482                 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
483
484                 if (rule->ifindex == -1)
485                         frh->flags |= FIB_RULE_DEV_DETACHED;
486         }
487
488         if (rule->pref)
489                 NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
490
491         if (rule->mark)
492                 NLA_PUT_U32(skb, FRA_FWMARK, rule->mark);
493
494         if (rule->mark_mask || rule->mark)
495                 NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
496
497         if (rule->target)
498                 NLA_PUT_U32(skb, FRA_GOTO, rule->target);
499
500         if (ops->fill(rule, skb, nlh, frh) < 0)
501                 goto nla_put_failure;
502
503         return nlmsg_end(skb, nlh);
504
505 nla_put_failure:
506         nlmsg_cancel(skb, nlh);
507         return -EMSGSIZE;
508 }
509
510 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
511                       struct fib_rules_ops *ops)
512 {
513         int idx = 0;
514         struct fib_rule *rule;
515
516         list_for_each_entry(rule, &ops->rules_list, list) {
517                 if (idx < cb->args[1])
518                         goto skip;
519
520                 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
521                                      cb->nlh->nlmsg_seq, RTM_NEWRULE,
522                                      NLM_F_MULTI, ops) < 0)
523                         break;
524 skip:
525                 idx++;
526         }
527         cb->args[1] = idx;
528         rules_ops_put(ops);
529
530         return skb->len;
531 }
532
533 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
534 {
535         struct net *net = skb->sk->sk_net;
536         struct fib_rules_ops *ops;
537         int idx = 0, family;
538
539         family = rtnl_msg_family(cb->nlh);
540         if (family != AF_UNSPEC) {
541                 /* Protocol specific dump request */
542                 ops = lookup_rules_ops(net, family);
543                 if (ops == NULL)
544                         return -EAFNOSUPPORT;
545
546                 return dump_rules(skb, cb, ops);
547         }
548
549         rcu_read_lock();
550         list_for_each_entry_rcu(ops, &net->rules_ops, list) {
551                 if (idx < cb->args[0] || !try_module_get(ops->owner))
552                         goto skip;
553
554                 if (dump_rules(skb, cb, ops) < 0)
555                         break;
556
557                 cb->args[1] = 0;
558         skip:
559                 idx++;
560         }
561         rcu_read_unlock();
562         cb->args[0] = idx;
563
564         return skb->len;
565 }
566
567 static void notify_rule_change(int event, struct fib_rule *rule,
568                                struct fib_rules_ops *ops, struct nlmsghdr *nlh,
569                                u32 pid)
570 {
571         struct net *net;
572         struct sk_buff *skb;
573         int err = -ENOBUFS;
574
575         net = ops->fro_net;
576         skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
577         if (skb == NULL)
578                 goto errout;
579
580         err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
581         if (err < 0) {
582                 /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
583                 WARN_ON(err == -EMSGSIZE);
584                 kfree_skb(skb);
585                 goto errout;
586         }
587
588         err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
589 errout:
590         if (err < 0)
591                 rtnl_set_sk_err(net, ops->nlgroup, err);
592 }
593
594 static void attach_rules(struct list_head *rules, struct net_device *dev)
595 {
596         struct fib_rule *rule;
597
598         list_for_each_entry(rule, rules, list) {
599                 if (rule->ifindex == -1 &&
600                     strcmp(dev->name, rule->ifname) == 0)
601                         rule->ifindex = dev->ifindex;
602         }
603 }
604
605 static void detach_rules(struct list_head *rules, struct net_device *dev)
606 {
607         struct fib_rule *rule;
608
609         list_for_each_entry(rule, rules, list)
610                 if (rule->ifindex == dev->ifindex)
611                         rule->ifindex = -1;
612 }
613
614
615 static int fib_rules_event(struct notifier_block *this, unsigned long event,
616                             void *ptr)
617 {
618         struct net_device *dev = ptr;
619         struct net *net = dev->nd_net;
620         struct fib_rules_ops *ops;
621
622         ASSERT_RTNL();
623         rcu_read_lock();
624
625         switch (event) {
626         case NETDEV_REGISTER:
627                 list_for_each_entry(ops, &net->rules_ops, list)
628                         attach_rules(&ops->rules_list, dev);
629                 break;
630
631         case NETDEV_UNREGISTER:
632                 list_for_each_entry(ops, &net->rules_ops, list)
633                         detach_rules(&ops->rules_list, dev);
634                 break;
635         }
636
637         rcu_read_unlock();
638
639         return NOTIFY_DONE;
640 }
641
642 static struct notifier_block fib_rules_notifier = {
643         .notifier_call = fib_rules_event,
644 };
645
646 static int fib_rules_net_init(struct net *net)
647 {
648         INIT_LIST_HEAD(&net->rules_ops);
649         spin_lock_init(&net->rules_mod_lock);
650         return 0;
651 }
652
653 static struct pernet_operations fib_rules_net_ops = {
654         .init = fib_rules_net_init,
655 };
656
657 static int __init fib_rules_init(void)
658 {
659         int err;
660         rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
661         rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
662         rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
663
664         err = register_netdevice_notifier(&fib_rules_notifier);
665         if (err < 0)
666                 goto fail;
667
668         err = register_pernet_subsys(&fib_rules_net_ops);
669         if (err < 0)
670                 goto fail_unregister;
671         return 0;
672
673 fail_unregister:
674         unregister_netdevice_notifier(&fib_rules_notifier);
675 fail:
676         rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
677         rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
678         rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
679         return err;
680 }
681
682 subsys_initcall(fib_rules_init);